aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Transforms/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils')
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp150
-rw-r--r--contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp252
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp769
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp328
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp1015
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp479
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp833
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneModule.cpp201
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp108
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp1122
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp165
-rw-r--r--contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp151
-rw-r--r--contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp95
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Evaluator.cpp597
-rw-r--r--contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp482
-rw-r--r--contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp923
-rw-r--r--contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp262
-rw-r--r--contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp196
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp205
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp2282
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp63
-rw-r--r--contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp674
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LCSSA.cpp438
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp565
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp2210
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp877
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp871
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp554
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp873
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp1396
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp323
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp94
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp510
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp531
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp108
-rw-r--r--contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp161
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp271
-rw-r--r--contrib/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp121
-rw-r--r--contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp32
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp793
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp1000
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp495
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp108
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp5998
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp765
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp152
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp2440
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SplitModule.cpp263
-rw-r--r--contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp80
-rw-r--r--contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp42
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp565
-rw-r--r--contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp116
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Utils.cpp45
-rw-r--r--contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp495
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp1109
55 files changed, 35753 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
new file mode 100644
index 000000000000..df9d5da9e26e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -0,0 +1,150 @@
+//===-- ASanStackFrameLayout.cpp - helper for AddressSanitizer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Definition of ComputeASanStackFrameLayout (see ASanStackFrameLayout.h).
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+namespace llvm {
+
+// We sort the stack variables by alignment (largest first) to minimize
+// unnecessary large gaps due to alignment.
+// It is tempting to also sort variables by size so that larger variables
+// have larger redzones at both ends. But reordering will make report analysis
+// harder, especially when temporary unnamed variables are present.
+// So, until we can provide more information (type, line number, etc)
+// for the stack variables we avoid reordering them too much.
+static inline bool CompareVars(const ASanStackVariableDescription &a,
+ const ASanStackVariableDescription &b) {
+ return a.Alignment > b.Alignment;
+}
+
+// We also force minimal alignment for all vars to kMinAlignment so that vars
+// with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars.
+static const size_t kMinAlignment = 16;
+
+// The larger the variable Size the larger is the redzone.
+// The resulting frame size is a multiple of Alignment.
+static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) {
+ size_t Res = 0;
+ if (Size <= 4) Res = 16;
+ else if (Size <= 16) Res = 32;
+ else if (Size <= 128) Res = Size + 32;
+ else if (Size <= 512) Res = Size + 64;
+ else if (Size <= 4096) Res = Size + 128;
+ else Res = Size + 256;
+ return alignTo(Res, Alignment);
+}
+
+ASanStackFrameLayout
+ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars,
+ size_t Granularity, size_t MinHeaderSize) {
+ assert(Granularity >= 8 && Granularity <= 64 &&
+ (Granularity & (Granularity - 1)) == 0);
+ assert(MinHeaderSize >= 16 && (MinHeaderSize & (MinHeaderSize - 1)) == 0 &&
+ MinHeaderSize >= Granularity);
+ const size_t NumVars = Vars.size();
+ assert(NumVars > 0);
+ for (size_t i = 0; i < NumVars; i++)
+ Vars[i].Alignment = std::max(Vars[i].Alignment, kMinAlignment);
+
+ std::stable_sort(Vars.begin(), Vars.end(), CompareVars);
+
+ ASanStackFrameLayout Layout;
+ Layout.Granularity = Granularity;
+ Layout.FrameAlignment = std::max(Granularity, Vars[0].Alignment);
+ size_t Offset = std::max(std::max(MinHeaderSize, Granularity),
+ Vars[0].Alignment);
+ assert((Offset % Granularity) == 0);
+ for (size_t i = 0; i < NumVars; i++) {
+ bool IsLast = i == NumVars - 1;
+ size_t Alignment = std::max(Granularity, Vars[i].Alignment);
+ (void)Alignment; // Used only in asserts.
+ size_t Size = Vars[i].Size;
+ assert((Alignment & (Alignment - 1)) == 0);
+ assert(Layout.FrameAlignment >= Alignment);
+ assert((Offset % Alignment) == 0);
+ assert(Size > 0);
+ size_t NextAlignment = IsLast ? Granularity
+ : std::max(Granularity, Vars[i + 1].Alignment);
+ size_t SizeWithRedzone = VarAndRedzoneSize(Size, NextAlignment);
+ Vars[i].Offset = Offset;
+ Offset += SizeWithRedzone;
+ }
+ if (Offset % MinHeaderSize) {
+ Offset += MinHeaderSize - (Offset % MinHeaderSize);
+ }
+ Layout.FrameSize = Offset;
+ assert((Layout.FrameSize % MinHeaderSize) == 0);
+ return Layout;
+}
+
+SmallString<64> ComputeASanStackFrameDescription(
+ const SmallVectorImpl<ASanStackVariableDescription> &Vars) {
+ SmallString<2048> StackDescriptionStorage;
+ raw_svector_ostream StackDescription(StackDescriptionStorage);
+ StackDescription << Vars.size();
+
+ for (const auto &Var : Vars) {
+ std::string Name = Var.Name;
+ if (Var.Line) {
+ Name += ":";
+ Name += to_string(Var.Line);
+ }
+ StackDescription << " " << Var.Offset << " " << Var.Size << " "
+ << Name.size() << " " << Name;
+ }
+ return StackDescription.str();
+}
+
+SmallVector<uint8_t, 64>
+GetShadowBytes(const SmallVectorImpl<ASanStackVariableDescription> &Vars,
+ const ASanStackFrameLayout &Layout) {
+ assert(Vars.size() > 0);
+ SmallVector<uint8_t, 64> SB;
+ SB.clear();
+ const size_t Granularity = Layout.Granularity;
+ SB.resize(Vars[0].Offset / Granularity, kAsanStackLeftRedzoneMagic);
+ for (const auto &Var : Vars) {
+ SB.resize(Var.Offset / Granularity, kAsanStackMidRedzoneMagic);
+
+ SB.resize(SB.size() + Var.Size / Granularity, 0);
+ if (Var.Size % Granularity)
+ SB.push_back(Var.Size % Granularity);
+ }
+ SB.resize(Layout.FrameSize / Granularity, kAsanStackRightRedzoneMagic);
+ return SB;
+}
+
+SmallVector<uint8_t, 64> GetShadowBytesAfterScope(
+ const SmallVectorImpl<ASanStackVariableDescription> &Vars,
+ const ASanStackFrameLayout &Layout) {
+ SmallVector<uint8_t, 64> SB = GetShadowBytes(Vars, Layout);
+ const size_t Granularity = Layout.Granularity;
+
+ for (const auto &Var : Vars) {
+ assert(Var.LifetimeSize <= Var.Size);
+ const size_t LifetimeShadowSize =
+ (Var.LifetimeSize + Granularity - 1) / Granularity;
+ const size_t Offset = Var.Offset / Granularity;
+ std::fill(SB.begin() + Offset, SB.begin() + Offset + LifetimeShadowSize,
+ kAsanStackUseAfterScopeMagic);
+ }
+
+ return SB;
+}
+
+} // llvm namespace
diff --git a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
new file mode 100644
index 000000000000..4c9746b8c691
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -0,0 +1,252 @@
+//===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file adds DWARF discriminators to the IR. Path discriminators are
+// used to decide what CFG path was taken inside sub-graphs whose instructions
+// share the same line and column number information.
+//
+// The main user of this is the sample profiler. Instruction samples are
+// mapped to line number information. Since a single line may be spread
+// out over several basic blocks, discriminators add more precise location
+// for the samples.
+//
+// For example,
+//
+// 1 #define ASSERT(P)
+// 2 if (!(P))
+// 3 abort()
+// ...
+// 100 while (true) {
+// 101 ASSERT (sum < 0);
+// 102 ...
+// 130 }
+//
+// when converted to IR, this snippet looks something like:
+//
+// while.body: ; preds = %entry, %if.end
+// %0 = load i32* %sum, align 4, !dbg !15
+// %cmp = icmp slt i32 %0, 0, !dbg !15
+// br i1 %cmp, label %if.end, label %if.then, !dbg !15
+//
+// if.then: ; preds = %while.body
+// call void @abort(), !dbg !15
+// br label %if.end, !dbg !15
+//
+// Notice that all the instructions in blocks 'while.body' and 'if.then'
+// have exactly the same debug information. When this program is sampled
+// at runtime, the profiler will assume that all these instructions are
+// equally frequent. This, in turn, will consider the edge while.body->if.then
+// to be frequently taken (which is incorrect).
+//
+// By adding a discriminator value to the instructions in block 'if.then',
+// we can distinguish instructions at line 101 with discriminator 0 from
+// the instructions at line 101 with discriminator 1.
+//
+// For more details about DWARF discriminators, please visit
+// http://wiki.dwarfstd.org/index.php?title=Path_Discriminators
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/AddDiscriminators.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "add-discriminators"
+
+namespace {
+// The legacy pass of AddDiscriminators.
+struct AddDiscriminatorsLegacyPass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ AddDiscriminatorsLegacyPass() : FunctionPass(ID) {
+ initializeAddDiscriminatorsLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+};
+
+} // end anonymous namespace
+
+char AddDiscriminatorsLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(AddDiscriminatorsLegacyPass, "add-discriminators",
+ "Add DWARF path discriminators", false, false)
+INITIALIZE_PASS_END(AddDiscriminatorsLegacyPass, "add-discriminators",
+ "Add DWARF path discriminators", false, false)
+
+// Command line option to disable discriminator generation even in the
+// presence of debug information. This is only needed when debugging
+// debug info generation issues.
+static cl::opt<bool> NoDiscriminators(
+ "no-discriminators", cl::init(false),
+ cl::desc("Disable generation of discriminator information."));
+
+// Create the legacy AddDiscriminatorsPass.
+FunctionPass *llvm::createAddDiscriminatorsPass() {
+ return new AddDiscriminatorsLegacyPass();
+}
+
+static bool shouldHaveDiscriminator(const Instruction *I) {
+ return !isa<IntrinsicInst>(I) || isa<MemIntrinsic>(I);
+}
+
+/// \brief Assign DWARF discriminators.
+///
+/// To assign discriminators, we examine the boundaries of every
+/// basic block and its successors. Suppose there is a basic block B1
+/// with successor B2. The last instruction I1 in B1 and the first
+/// instruction I2 in B2 are located at the same file and line number.
+/// This situation is illustrated in the following code snippet:
+///
+/// if (i < 10) x = i;
+///
+/// entry:
+/// br i1 %cmp, label %if.then, label %if.end, !dbg !10
+/// if.then:
+/// %1 = load i32* %i.addr, align 4, !dbg !10
+/// store i32 %1, i32* %x, align 4, !dbg !10
+/// br label %if.end, !dbg !10
+/// if.end:
+/// ret void, !dbg !12
+///
+/// Notice how the branch instruction in block 'entry' and all the
+/// instructions in block 'if.then' have the exact same debug location
+/// information (!dbg !10).
+///
+/// To distinguish instructions in block 'entry' from instructions in
+/// block 'if.then', we generate a new lexical block for all the
+/// instruction in block 'if.then' that share the same file and line
+/// location with the last instruction of block 'entry'.
+///
+/// This new lexical block will have the same location information as
+/// the previous one, but with a new DWARF discriminator value.
+///
+/// One of the main uses of this discriminator value is in runtime
+/// sample profilers. It allows the profiler to distinguish instructions
+/// at location !dbg !10 that execute on different basic blocks. This is
+/// important because while the predicate 'if (x < 10)' may have been
+/// executed millions of times, the assignment 'x = i' may have only
+/// executed a handful of times (meaning that the entry->if.then edge is
+/// seldom taken).
+///
+/// If we did not have discriminator information, the profiler would
+/// assign the same weight to both blocks 'entry' and 'if.then', which
+/// in turn will make it conclude that the entry->if.then edge is very
+/// hot.
+///
+/// To decide where to create new discriminator values, this function
+/// traverses the CFG and examines instruction at basic block boundaries.
+/// If the last instruction I1 of a block B1 is at the same file and line
+/// location as instruction I2 of successor B2, then it creates a new
+/// lexical block for I2 and all the instruction in B2 that share the same
+/// file and line location as I2. This new lexical block will have a
+/// different discriminator number than I1.
+static bool addDiscriminators(Function &F) {
+ // If the function has debug information, but the user has disabled
+ // discriminators, do nothing.
+ // Simlarly, if the function has no debug info, do nothing.
+ if (NoDiscriminators || !F.getSubprogram())
+ return false;
+
+ bool Changed = false;
+
+ typedef std::pair<StringRef, unsigned> Location;
+ typedef DenseSet<const BasicBlock *> BBSet;
+ typedef DenseMap<Location, BBSet> LocationBBMap;
+ typedef DenseMap<Location, unsigned> LocationDiscriminatorMap;
+ typedef DenseSet<Location> LocationSet;
+
+ LocationBBMap LBM;
+ LocationDiscriminatorMap LDM;
+
+ // Traverse all instructions in the function. If the source line location
+ // of the instruction appears in other basic block, assign a new
+ // discriminator for this instruction.
+ for (BasicBlock &B : F) {
+ for (auto &I : B.getInstList()) {
+ // Not all intrinsic calls should have a discriminator.
+ // We want to avoid a non-deterministic assignment of discriminators at
+ // different debug levels. We still allow discriminators on memory
+ // intrinsic calls because those can be early expanded by SROA into
+ // pairs of loads and stores, and the expanded load/store instructions
+ // should have a valid discriminator.
+ if (!shouldHaveDiscriminator(&I))
+ continue;
+ const DILocation *DIL = I.getDebugLoc();
+ if (!DIL)
+ continue;
+ Location L = std::make_pair(DIL->getFilename(), DIL->getLine());
+ auto &BBMap = LBM[L];
+ auto R = BBMap.insert(&B);
+ if (BBMap.size() == 1)
+ continue;
+ // If we could insert more than one block with the same line+file, a
+ // discriminator is needed to distinguish both instructions.
+ // Only the lowest 7 bits are used to represent a discriminator to fit
+ // it in 1 byte ULEB128 representation.
+ unsigned Discriminator = R.second ? ++LDM[L] : LDM[L];
+ I.setDebugLoc(DIL->setBaseDiscriminator(Discriminator));
+ DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
+ << DIL->getColumn() << ":" << Discriminator << " " << I
+ << "\n");
+ Changed = true;
+ }
+ }
+
+ // Traverse all instructions and assign new discriminators to call
+ // instructions with the same lineno that are in the same basic block.
+ // Sample base profile needs to distinguish different function calls within
+ // a same source line for correct profile annotation.
+ for (BasicBlock &B : F) {
+ LocationSet CallLocations;
+ for (auto &I : B.getInstList()) {
+ CallInst *Current = dyn_cast<CallInst>(&I);
+ // We bypass intrinsic calls for the following two reasons:
+ // 1) We want to avoid a non-deterministic assigment of
+ // discriminators.
+ // 2) We want to minimize the number of base discriminators used.
+ if (!Current || isa<IntrinsicInst>(&I))
+ continue;
+
+ DILocation *CurrentDIL = Current->getDebugLoc();
+ if (!CurrentDIL)
+ continue;
+ Location L =
+ std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
+ if (!CallLocations.insert(L).second) {
+ unsigned Discriminator = ++LDM[L];
+ Current->setDebugLoc(CurrentDIL->setBaseDiscriminator(Discriminator));
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+}
+
+bool AddDiscriminatorsLegacyPass::runOnFunction(Function &F) {
+ return addDiscriminators(F);
+}
+PreservedAnalyses AddDiscriminatorsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ if (!addDiscriminators(F))
+ return PreservedAnalyses::all();
+
+ // FIXME: should be all()
+ return PreservedAnalyses::none();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
new file mode 100644
index 000000000000..3d5cbfc93f2e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -0,0 +1,769 @@
+//===-- BasicBlockUtils.cpp - BasicBlock Utilities -------------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform manipulations on basic blocks, and
+// instructions contained within basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+using namespace llvm;
+
+void llvm::DeleteDeadBlock(BasicBlock *BB) {
+ assert((pred_begin(BB) == pred_end(BB) ||
+ // Can delete self loop.
+ BB->getSinglePredecessor() == BB) && "Block is not dead!");
+ TerminatorInst *BBTerm = BB->getTerminator();
+
+ // Loop through all of our successors and make sure they know that one
+ // of their predecessors is going away.
+ for (BasicBlock *Succ : BBTerm->successors())
+ Succ->removePredecessor(BB);
+
+ // Zap all the instructions in the block.
+ while (!BB->empty()) {
+ Instruction &I = BB->back();
+ // If this instruction is used, replace uses with an arbitrary value.
+ // Because control flow can't get here, we don't care what we replace the
+ // value with. Note that since this block is unreachable, and all values
+ // contained within it must dominate their uses, that all uses will
+ // eventually be removed (they are themselves dead).
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ BB->getInstList().pop_back();
+ }
+
+ // Zap the block!
+ BB->eraseFromParent();
+}
+
+void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
+ MemoryDependenceResults *MemDep) {
+ if (!isa<PHINode>(BB->begin())) return;
+
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ if (PN->getIncomingValue(0) != PN)
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ else
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+
+ if (MemDep)
+ MemDep->removeInstruction(PN); // Memdep updates AA itself.
+
+ PN->eraseFromParent();
+ }
+}
+
+bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
+ // Recursively deleting a PHI may cause multiple PHIs to be deleted
+ // or RAUW'd undef, so use an array of WeakTrackingVH for the PHIs to delete.
+ SmallVector<WeakTrackingVH, 8> PHIs;
+ for (BasicBlock::iterator I = BB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PHIs.push_back(PN);
+
+ bool Changed = false;
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+ if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*()))
+ Changed |= RecursivelyDeleteDeadPHINode(PN, TLI);
+
+ return Changed;
+}
+
+bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
+ LoopInfo *LI,
+ MemoryDependenceResults *MemDep) {
+ // Don't merge away blocks who have their address taken.
+ if (BB->hasAddressTaken()) return false;
+
+ // Can't merge if there are multiple predecessors, or no predecessors.
+ BasicBlock *PredBB = BB->getUniquePredecessor();
+ if (!PredBB) return false;
+
+ // Don't break self-loops.
+ if (PredBB == BB) return false;
+ // Don't break unwinding instructions.
+ if (PredBB->getTerminator()->isExceptional())
+ return false;
+
+ succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB));
+ BasicBlock *OnlySucc = BB;
+ for (; SI != SE; ++SI)
+ if (*SI != OnlySucc) {
+ OnlySucc = nullptr; // There are multiple distinct successors!
+ break;
+ }
+
+ // Can't merge if there are multiple successors.
+ if (!OnlySucc) return false;
+
+ // Can't merge if there is PHI loop.
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BI)) {
+ for (Value *IncValue : PN->incoming_values())
+ if (IncValue == PN)
+ return false;
+ } else
+ break;
+ }
+
+ // Begin by getting rid of unneeded PHIs.
+ if (isa<PHINode>(BB->front()))
+ FoldSingleEntryPHINodes(BB, MemDep);
+
+ // Delete the unconditional branch from the predecessor...
+ PredBB->getInstList().pop_back();
+
+ // Make all PHI nodes that referred to BB now refer to Pred as their
+ // source...
+ BB->replaceAllUsesWith(PredBB);
+
+ // Move all definitions in the successor to the predecessor...
+ PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+
+ // Inherit predecessors name if it exists.
+ if (!PredBB->hasName())
+ PredBB->takeName(BB);
+
+ // Finally, erase the old block and update dominator info.
+ if (DT)
+ if (DomTreeNode *DTN = DT->getNode(BB)) {
+ DomTreeNode *PredDTN = DT->getNode(PredBB);
+ SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end());
+ for (DomTreeNode *DI : Children)
+ DT->changeImmediateDominator(DI, PredDTN);
+
+ DT->eraseNode(BB);
+ }
+
+ if (LI)
+ LI->removeBlock(BB);
+
+ if (MemDep)
+ MemDep->invalidateCachedPredecessors();
+
+ BB->eraseFromParent();
+ return true;
+}
+
+void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL,
+ BasicBlock::iterator &BI, Value *V) {
+ Instruction &I = *BI;
+ // Replaces all of the uses of the instruction with uses of the value
+ I.replaceAllUsesWith(V);
+
+ // Make sure to propagate a name if there is one already.
+ if (I.hasName() && !V->hasName())
+ V->takeName(&I);
+
+ // Delete the unnecessary instruction now...
+ BI = BIL.erase(BI);
+}
+
+void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
+ BasicBlock::iterator &BI, Instruction *I) {
+ assert(I->getParent() == nullptr &&
+ "ReplaceInstWithInst: Instruction already inserted into basic block!");
+
+ // Copy debug location to newly added instruction, if it wasn't already set
+ // by the caller.
+ if (!I->getDebugLoc())
+ I->setDebugLoc(BI->getDebugLoc());
+
+ // Insert the new instruction into the basic block...
+ BasicBlock::iterator New = BIL.insert(BI, I);
+
+ // Replace all uses of the old instruction, and delete it.
+ ReplaceInstWithValue(BIL, BI, I);
+
+ // Move BI back to point to the newly inserted instruction
+ BI = New;
+}
+
+void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
+ BasicBlock::iterator BI(From);
+ ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
+}
+
+BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
+ LoopInfo *LI) {
+ unsigned SuccNum = GetSuccessorNumber(BB, Succ);
+
+ // If this is a critical edge, let SplitCriticalEdge do it.
+ TerminatorInst *LatchTerm = BB->getTerminator();
+ if (SplitCriticalEdge(LatchTerm, SuccNum, CriticalEdgeSplittingOptions(DT, LI)
+ .setPreserveLCSSA()))
+ return LatchTerm->getSuccessor(SuccNum);
+
+ // If the edge isn't critical, then BB has a single successor or Succ has a
+ // single pred. Split the block.
+ if (BasicBlock *SP = Succ->getSinglePredecessor()) {
+ // If the successor only has a single pred, split the top of the successor
+ // block.
+ assert(SP == BB && "CFG broken");
+ SP = nullptr;
+ return SplitBlock(Succ, &Succ->front(), DT, LI);
+ }
+
+ // Otherwise, if BB has a single successor, split it at the bottom of the
+ // block.
+ assert(BB->getTerminator()->getNumSuccessors() == 1 &&
+ "Should have a single succ!");
+ return SplitBlock(BB, BB->getTerminator(), DT, LI);
+}
+
+unsigned
+llvm::SplitAllCriticalEdges(Function &F,
+ const CriticalEdgeSplittingOptions &Options) {
+ unsigned NumBroken = 0;
+ for (BasicBlock &BB : F) {
+ TerminatorInst *TI = BB.getTerminator();
+ if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (SplitCriticalEdge(TI, i, Options))
+ ++NumBroken;
+ }
+ return NumBroken;
+}
+
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
+ DominatorTree *DT, LoopInfo *LI) {
+ BasicBlock::iterator SplitIt = SplitPt->getIterator();
+ while (isa<PHINode>(SplitIt) || SplitIt->isEHPad())
+ ++SplitIt;
+ BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
+
+ // The new block lives in whichever loop the old one did. This preserves
+ // LCSSA as well, because we force the split point to be after any PHI nodes.
+ if (LI)
+ if (Loop *L = LI->getLoopFor(Old))
+ L->addBasicBlockToLoop(New, *LI);
+
+ if (DT)
+ // Old dominates New. New node dominates all other nodes dominated by Old.
+ if (DomTreeNode *OldNode = DT->getNode(Old)) {
+ std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
+
+ DomTreeNode *NewNode = DT->addNewBlock(New, Old);
+ for (DomTreeNode *I : Children)
+ DT->changeImmediateDominator(I, NewNode);
+ }
+
+ return New;
+}
+
+/// Update DominatorTree, LoopInfo, and LCCSA analysis information.
+static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
+ ArrayRef<BasicBlock *> Preds,
+ DominatorTree *DT, LoopInfo *LI,
+ bool PreserveLCSSA, bool &HasLoopExit) {
+ // Update dominator tree if available.
+ if (DT)
+ DT->splitBlock(NewBB);
+
+ // The rest of the logic is only relevant for updating the loop structures.
+ if (!LI)
+ return;
+
+ Loop *L = LI->getLoopFor(OldBB);
+
+ // If we need to preserve loop analyses, collect some information about how
+ // this split will affect loops.
+ bool IsLoopEntry = !!L;
+ bool SplitMakesNewLoopHeader = false;
+ for (BasicBlock *Pred : Preds) {
+ // If we need to preserve LCSSA, determine if any of the preds is a loop
+ // exit.
+ if (PreserveLCSSA)
+ if (Loop *PL = LI->getLoopFor(Pred))
+ if (!PL->contains(OldBB))
+ HasLoopExit = true;
+
+ // If we need to preserve LoopInfo, note whether any of the preds crosses
+ // an interesting loop boundary.
+ if (!L)
+ continue;
+ if (L->contains(Pred))
+ IsLoopEntry = false;
+ else
+ SplitMakesNewLoopHeader = true;
+ }
+
+ // Unless we have a loop for OldBB, nothing else to do here.
+ if (!L)
+ return;
+
+ if (IsLoopEntry) {
+ // Add the new block to the nearest enclosing loop (and not an adjacent
+ // loop). To find this, examine each of the predecessors and determine which
+ // loops enclose them, and select the most-nested loop which contains the
+ // loop containing the block being split.
+ Loop *InnermostPredLoop = nullptr;
+ for (BasicBlock *Pred : Preds) {
+ if (Loop *PredLoop = LI->getLoopFor(Pred)) {
+ // Seek a loop which actually contains the block being split (to avoid
+ // adjacent loops).
+ while (PredLoop && !PredLoop->contains(OldBB))
+ PredLoop = PredLoop->getParentLoop();
+
+ // Select the most-nested of these loops which contains the block.
+ if (PredLoop && PredLoop->contains(OldBB) &&
+ (!InnermostPredLoop ||
+ InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth()))
+ InnermostPredLoop = PredLoop;
+ }
+ }
+
+ if (InnermostPredLoop)
+ InnermostPredLoop->addBasicBlockToLoop(NewBB, *LI);
+ } else {
+ L->addBasicBlockToLoop(NewBB, *LI);
+ if (SplitMakesNewLoopHeader)
+ L->moveToHeader(NewBB);
+ }
+}
+
+/// Update the PHI nodes in OrigBB to include the values coming from NewBB.
+/// This also updates AliasAnalysis, if available.
+static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
+ ArrayRef<BasicBlock *> Preds, BranchInst *BI,
+ bool HasLoopExit) {
+ // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB.
+ SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end());
+ for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) {
+ PHINode *PN = cast<PHINode>(I++);
+
+ // Check to see if all of the values coming in are the same. If so, we
+ // don't need to create a new PHI node, unless it's needed for LCSSA.
+ Value *InVal = nullptr;
+ if (!HasLoopExit) {
+ InVal = PN->getIncomingValueForBlock(Preds[0]);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (!PredSet.count(PN->getIncomingBlock(i)))
+ continue;
+ if (!InVal)
+ InVal = PN->getIncomingValue(i);
+ else if (InVal != PN->getIncomingValue(i)) {
+ InVal = nullptr;
+ break;
+ }
+ }
+ }
+
+ if (InVal) {
+ // If all incoming values for the new PHI would be the same, just don't
+ // make a new PHI. Instead, just remove the incoming values from the old
+ // PHI.
+
+ // NOTE! This loop walks backwards for a reason! First off, this minimizes
+ // the cost of removal if we end up removing a large number of values, and
+ // second off, this ensures that the indices for the incoming values
+ // aren't invalidated when we remove one.
+ for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i)
+ if (PredSet.count(PN->getIncomingBlock(i)))
+ PN->removeIncomingValue(i, false);
+
+ // Add an incoming value to the PHI node in the loop for the preheader
+ // edge.
+ PN->addIncoming(InVal, NewBB);
+ continue;
+ }
+
+ // If the values coming into the block are not the same, we need a new
+ // PHI.
+ // Create the new PHI node, insert it into NewBB at the end of the block
+ PHINode *NewPHI =
+ PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI);
+
+ // NOTE! This loop walks backwards for a reason! First off, this minimizes
+ // the cost of removal if we end up removing a large number of values, and
+ // second off, this ensures that the indices for the incoming values aren't
+ // invalidated when we remove one.
+ for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) {
+ BasicBlock *IncomingBB = PN->getIncomingBlock(i);
+ if (PredSet.count(IncomingBB)) {
+ Value *V = PN->removeIncomingValue(i, false);
+ NewPHI->addIncoming(V, IncomingBB);
+ }
+ }
+
+ PN->addIncoming(NewPHI, NewBB);
+ }
+}
+
+BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix, DominatorTree *DT,
+ LoopInfo *LI, bool PreserveLCSSA) {
+ // Do not attempt to split that which cannot be split.
+ if (!BB->canSplitPredecessors())
+ return nullptr;
+
+ // For the landingpads we need to act a bit differently.
+ // Delegate this work to the SplitLandingPadPredecessors.
+ if (BB->isLandingPad()) {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ std::string NewName = std::string(Suffix) + ".split-lp";
+
+ SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs, DT,
+ LI, PreserveLCSSA);
+ return NewBBs[0];
+ }
+
+ // Create new basic block, insert right before the original block.
+ BasicBlock *NewBB = BasicBlock::Create(
+ BB->getContext(), BB->getName() + Suffix, BB->getParent(), BB);
+
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI = BranchInst::Create(BB, NewBB);
+ BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
+
+ // Move the edges from Preds to point to NewBB instead of BB.
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ // This is slightly more strict than necessary; the minimum requirement
+ // is that there be no more than one indirectbr branching to BB. And
+ // all BlockAddress uses would need to be updated.
+ assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+ Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
+ }
+
+ // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
+ // node becomes an incoming value for BB's phi node. However, if the Preds
+ // list is empty, we need to insert dummy entries into the PHI nodes in BB to
+ // account for the newly created predecessor.
+ if (Preds.size() == 0) {
+ // Insert dummy values as the incoming value.
+ for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
+ cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
+ return NewBB;
+ }
+
+ // Update DominatorTree, LoopInfo, and LCCSA analysis information.
+ bool HasLoopExit = false;
+ UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, PreserveLCSSA,
+ HasLoopExit);
+
+ // Update the PHI nodes in BB with the values coming from NewBB.
+ UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit);
+ return NewBB;
+}
+
+void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix1, const char *Suffix2,
+ SmallVectorImpl<BasicBlock *> &NewBBs,
+ DominatorTree *DT, LoopInfo *LI,
+ bool PreserveLCSSA) {
+ assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!");
+
+ // Create a new basic block for OrigBB's predecessors listed in Preds. Insert
+ // it right before the original block.
+ BasicBlock *NewBB1 = BasicBlock::Create(OrigBB->getContext(),
+ OrigBB->getName() + Suffix1,
+ OrigBB->getParent(), OrigBB);
+ NewBBs.push_back(NewBB1);
+
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI1 = BranchInst::Create(OrigBB, NewBB1);
+ BI1->setDebugLoc(OrigBB->getFirstNonPHI()->getDebugLoc());
+
+ // Move the edges from Preds to point to NewBB1 instead of OrigBB.
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ // This is slightly more strict than necessary; the minimum requirement
+ // is that there be no more than one indirectbr branching to BB. And
+ // all BlockAddress uses would need to be updated.
+ assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+ Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1);
+ }
+
+ bool HasLoopExit = false;
+ UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DT, LI, PreserveLCSSA,
+ HasLoopExit);
+
+ // Update the PHI nodes in OrigBB with the values coming from NewBB1.
+ UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, HasLoopExit);
+
+ // Move the remaining edges from OrigBB to point to NewBB2.
+ SmallVector<BasicBlock*, 8> NewBB2Preds;
+ for (pred_iterator i = pred_begin(OrigBB), e = pred_end(OrigBB);
+ i != e; ) {
+ BasicBlock *Pred = *i++;
+ if (Pred == NewBB1) continue;
+ assert(!isa<IndirectBrInst>(Pred->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+ NewBB2Preds.push_back(Pred);
+ e = pred_end(OrigBB);
+ }
+
+ BasicBlock *NewBB2 = nullptr;
+ if (!NewBB2Preds.empty()) {
+ // Create another basic block for the rest of OrigBB's predecessors.
+ NewBB2 = BasicBlock::Create(OrigBB->getContext(),
+ OrigBB->getName() + Suffix2,
+ OrigBB->getParent(), OrigBB);
+ NewBBs.push_back(NewBB2);
+
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI2 = BranchInst::Create(OrigBB, NewBB2);
+ BI2->setDebugLoc(OrigBB->getFirstNonPHI()->getDebugLoc());
+
+ // Move the remaining edges from OrigBB to point to NewBB2.
+ for (BasicBlock *NewBB2Pred : NewBB2Preds)
+ NewBB2Pred->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2);
+
+ // Update DominatorTree, LoopInfo, and LCCSA analysis information.
+ HasLoopExit = false;
+ UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DT, LI,
+ PreserveLCSSA, HasLoopExit);
+
+ // Update the PHI nodes in OrigBB with the values coming from NewBB2.
+ UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, HasLoopExit);
+ }
+
+ LandingPadInst *LPad = OrigBB->getLandingPadInst();
+ Instruction *Clone1 = LPad->clone();
+ Clone1->setName(Twine("lpad") + Suffix1);
+ NewBB1->getInstList().insert(NewBB1->getFirstInsertionPt(), Clone1);
+
+ if (NewBB2) {
+ Instruction *Clone2 = LPad->clone();
+ Clone2->setName(Twine("lpad") + Suffix2);
+ NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2);
+
+ // Create a PHI node for the two cloned landingpad instructions only
+ // if the original landingpad instruction has some uses.
+ if (!LPad->use_empty()) {
+ assert(!LPad->getType()->isTokenTy() &&
+ "Split cannot be applied if LPad is token type. Otherwise an "
+ "invalid PHINode of token type would be created.");
+ PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad);
+ PN->addIncoming(Clone1, NewBB1);
+ PN->addIncoming(Clone2, NewBB2);
+ LPad->replaceAllUsesWith(PN);
+ }
+ LPad->eraseFromParent();
+ } else {
+ // There is no second clone. Just replace the landing pad with the first
+ // clone.
+ LPad->replaceAllUsesWith(Clone1);
+ LPad->eraseFromParent();
+ }
+}
+
+ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
+ BasicBlock *Pred) {
+ Instruction *UncondBranch = Pred->getTerminator();
+ // Clone the return and add it to the end of the predecessor.
+ Instruction *NewRet = RI->clone();
+ Pred->getInstList().push_back(NewRet);
+
+ // If the return instruction returns a value, and if the value was a
+ // PHI node in "BB", propagate the right value into the return.
+ for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
+ i != e; ++i) {
+ Value *V = *i;
+ Instruction *NewBC = nullptr;
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) {
+ // Return value might be bitcasted. Clone and insert it before the
+ // return instruction.
+ V = BCI->getOperand(0);
+ NewBC = BCI->clone();
+ Pred->getInstList().insert(NewRet->getIterator(), NewBC);
+ *i = NewBC;
+ }
+ if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (PN->getParent() == BB) {
+ if (NewBC)
+ NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred));
+ else
+ *i = PN->getIncomingValueForBlock(Pred);
+ }
+ }
+ }
+
+ // Update any PHI nodes in the returning block to realize that we no
+ // longer branch to them.
+ BB->removePredecessor(Pred);
+ UncondBranch->eraseFromParent();
+ return cast<ReturnInst>(NewRet);
+}
+
+TerminatorInst *
+llvm::SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
+ bool Unreachable, MDNode *BranchWeights,
+ DominatorTree *DT, LoopInfo *LI) {
+ BasicBlock *Head = SplitBefore->getParent();
+ BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
+ TerminatorInst *HeadOldTerm = Head->getTerminator();
+ LLVMContext &C = Head->getContext();
+ BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ TerminatorInst *CheckTerm;
+ if (Unreachable)
+ CheckTerm = new UnreachableInst(C, ThenBlock);
+ else
+ CheckTerm = BranchInst::Create(Tail, ThenBlock);
+ CheckTerm->setDebugLoc(SplitBefore->getDebugLoc());
+ BranchInst *HeadNewTerm =
+ BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cond);
+ HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
+ ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
+
+ if (DT) {
+ if (DomTreeNode *OldNode = DT->getNode(Head)) {
+ std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
+
+ DomTreeNode *NewNode = DT->addNewBlock(Tail, Head);
+ for (DomTreeNode *Child : Children)
+ DT->changeImmediateDominator(Child, NewNode);
+
+ // Head dominates ThenBlock.
+ DT->addNewBlock(ThenBlock, Head);
+ }
+ }
+
+ if (LI) {
+ if (Loop *L = LI->getLoopFor(Head)) {
+ L->addBasicBlockToLoop(ThenBlock, *LI);
+ L->addBasicBlockToLoop(Tail, *LI);
+ }
+ }
+
+ return CheckTerm;
+}
+
+void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
+ TerminatorInst **ThenTerm,
+ TerminatorInst **ElseTerm,
+ MDNode *BranchWeights) {
+ BasicBlock *Head = SplitBefore->getParent();
+ BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
+ TerminatorInst *HeadOldTerm = Head->getTerminator();
+ LLVMContext &C = Head->getContext();
+ BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ BasicBlock *ElseBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ *ThenTerm = BranchInst::Create(Tail, ThenBlock);
+ (*ThenTerm)->setDebugLoc(SplitBefore->getDebugLoc());
+ *ElseTerm = BranchInst::Create(Tail, ElseBlock);
+ (*ElseTerm)->setDebugLoc(SplitBefore->getDebugLoc());
+ BranchInst *HeadNewTerm =
+ BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/ElseBlock, Cond);
+ HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
+ ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
+}
+
+
+Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
+ BasicBlock *&IfFalse) {
+ PHINode *SomePHI = dyn_cast<PHINode>(BB->begin());
+ BasicBlock *Pred1 = nullptr;
+ BasicBlock *Pred2 = nullptr;
+
+ if (SomePHI) {
+ if (SomePHI->getNumIncomingValues() != 2)
+ return nullptr;
+ Pred1 = SomePHI->getIncomingBlock(0);
+ Pred2 = SomePHI->getIncomingBlock(1);
+ } else {
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ if (PI == PE) // No predecessor
+ return nullptr;
+ Pred1 = *PI++;
+ if (PI == PE) // Only one predecessor
+ return nullptr;
+ Pred2 = *PI++;
+ if (PI != PE) // More than two predecessors
+ return nullptr;
+ }
+
+ // We can only handle branches. Other control flow will be lowered to
+ // branches if possible anyway.
+ BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
+ BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
+ if (!Pred1Br || !Pred2Br)
+ return nullptr;
+
+ // Eliminate code duplication by ensuring that Pred1Br is conditional if
+ // either are.
+ if (Pred2Br->isConditional()) {
+ // If both branches are conditional, we don't have an "if statement". In
+ // reality, we could transform this case, but since the condition will be
+ // required anyway, we stand no chance of eliminating it, so the xform is
+ // probably not profitable.
+ if (Pred1Br->isConditional())
+ return nullptr;
+
+ std::swap(Pred1, Pred2);
+ std::swap(Pred1Br, Pred2Br);
+ }
+
+ if (Pred1Br->isConditional()) {
+ // The only thing we have to watch out for here is to make sure that Pred2
+ // doesn't have incoming edges from other blocks. If it does, the condition
+ // doesn't dominate BB.
+ if (!Pred2->getSinglePredecessor())
+ return nullptr;
+
+ // If we found a conditional branch predecessor, make sure that it branches
+ // to BB and Pred2Br. If it doesn't, this isn't an "if statement".
+ if (Pred1Br->getSuccessor(0) == BB &&
+ Pred1Br->getSuccessor(1) == Pred2) {
+ IfTrue = Pred1;
+ IfFalse = Pred2;
+ } else if (Pred1Br->getSuccessor(0) == Pred2 &&
+ Pred1Br->getSuccessor(1) == BB) {
+ IfTrue = Pred2;
+ IfFalse = Pred1;
+ } else {
+ // We know that one arm of the conditional goes to BB, so the other must
+ // go somewhere unrelated, and this must not be an "if statement".
+ return nullptr;
+ }
+
+ return Pred1Br->getCondition();
+ }
+
+ // Ok, if we got here, both predecessors end with an unconditional branch to
+ // BB. Don't panic! If both blocks only have a single (identical)
+ // predecessor, and THAT is a conditional branch, then we're all ok!
+ BasicBlock *CommonPred = Pred1->getSinglePredecessor();
+ if (CommonPred == nullptr || CommonPred != Pred2->getSinglePredecessor())
+ return nullptr;
+
+ // Otherwise, if this is a conditional branch, then we can use it!
+ BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
+ if (!BI) return nullptr;
+
+ assert(BI->isConditional() && "Two successors but not conditional?");
+ if (BI->getSuccessor(0) == Pred1) {
+ IfTrue = Pred1;
+ IfFalse = Pred2;
+ } else {
+ IfTrue = Pred2;
+ IfFalse = Pred1;
+ }
+ return BI->getCondition();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
new file mode 100644
index 000000000000..175cbd2ce0df
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -0,0 +1,328 @@
+//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// BreakCriticalEdges pass - Break all of the critical edges in the CFG by
+// inserting a dummy basic block. This pass may be "required" by passes that
+// cannot deal with critical edges. For this usage, the structure type is
+// forward declared. This pass obviously invalidates the CFG, but can update
+// dominator trees.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BreakCriticalEdges.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "break-crit-edges"
+
+STATISTIC(NumBroken, "Number of blocks inserted");
+
+namespace {
+ struct BreakCriticalEdges : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ BreakCriticalEdges() : FunctionPass(ID) {
+ initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+ auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+ unsigned N =
+ SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI));
+ NumBroken += N;
+ return N > 0;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+
+ // No loop canonicalization guarantees are broken by this pass.
+ AU.addPreservedID(LoopSimplifyID);
+ }
+ };
+}
+
+char BreakCriticalEdges::ID = 0;
+INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
+ "Break critical edges in CFG", false, false)
+
+// Publicly exposed interface to pass...
+char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
+FunctionPass *llvm::createBreakCriticalEdgesPass() {
+ return new BreakCriticalEdges();
+}
+
+PreservedAnalyses BreakCriticalEdgesPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+ auto *LI = AM.getCachedResult<LoopAnalysis>(F);
+ unsigned N = SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI));
+ NumBroken += N;
+ if (N == 0)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<LoopAnalysis>();
+ return PA;
+}
+
+//===----------------------------------------------------------------------===//
+// Implementation of the external critical edge manipulation functions
+//===----------------------------------------------------------------------===//
+
+/// When a loop exit edge is split, LCSSA form may require new PHIs in the new
+/// exit block. This function inserts the new PHIs, as needed. Preds is a list
+/// of preds inside the loop, SplitBB is the new loop exit block, and DestBB is
+/// the old loop exit, now the successor of SplitBB.
+static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
+ BasicBlock *SplitBB,
+ BasicBlock *DestBB) {
+ // SplitBB shouldn't have anything non-trivial in it yet.
+ assert((SplitBB->getFirstNonPHI() == SplitBB->getTerminator() ||
+ SplitBB->isLandingPad()) && "SplitBB has non-PHI nodes!");
+
+ // For each PHI in the destination block.
+ for (BasicBlock::iterator I = DestBB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ unsigned Idx = PN->getBasicBlockIndex(SplitBB);
+ Value *V = PN->getIncomingValue(Idx);
+
+ // If the input is a PHI which already satisfies LCSSA, don't create
+ // a new one.
+ if (const PHINode *VP = dyn_cast<PHINode>(V))
+ if (VP->getParent() == SplitBB)
+ continue;
+
+ // Otherwise a new PHI is needed. Create one and populate it.
+ PHINode *NewPN = PHINode::Create(
+ PN->getType(), Preds.size(), "split",
+ SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator());
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i)
+ NewPN->addIncoming(V, Preds[i]);
+
+ // Update the original PHI.
+ PN->setIncomingValue(Idx, NewPN);
+ }
+}
+
+BasicBlock *
+llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
+ const CriticalEdgeSplittingOptions &Options) {
+ if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges))
+ return nullptr;
+
+ assert(!isa<IndirectBrInst>(TI) &&
+ "Cannot split critical edge from IndirectBrInst");
+
+ BasicBlock *TIBB = TI->getParent();
+ BasicBlock *DestBB = TI->getSuccessor(SuccNum);
+
+ // Splitting the critical edge to a pad block is non-trivial. Don't do
+ // it in this generic function.
+ if (DestBB->isEHPad()) return nullptr;
+
+ // Create a new basic block, linking it into the CFG.
+ BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
+ TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
+ // Create our unconditional branch.
+ BranchInst *NewBI = BranchInst::Create(DestBB, NewBB);
+ NewBI->setDebugLoc(TI->getDebugLoc());
+
+ // Branch to the new block, breaking the edge.
+ TI->setSuccessor(SuccNum, NewBB);
+
+ // Insert the block into the function... right after the block TI lives in.
+ Function &F = *TIBB->getParent();
+ Function::iterator FBBI = TIBB->getIterator();
+ F.getBasicBlockList().insert(++FBBI, NewBB);
+
+ // If there are any PHI nodes in DestBB, we need to update them so that they
+ // merge incoming values from NewBB instead of from TIBB.
+ {
+ unsigned BBIdx = 0;
+ for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
+ // We no longer enter through TIBB, now we come in through NewBB.
+ // Revector exactly one entry in the PHI node that used to come from
+ // TIBB to come from NewBB.
+ PHINode *PN = cast<PHINode>(I);
+
+ // Reuse the previous value of BBIdx if it lines up. In cases where we
+ // have multiple phi nodes with *lots* of predecessors, this is a speed
+ // win because we don't have to scan the PHI looking for TIBB. This
+ // happens because the BB list of PHI nodes are usually in the same
+ // order.
+ if (PN->getIncomingBlock(BBIdx) != TIBB)
+ BBIdx = PN->getBasicBlockIndex(TIBB);
+ PN->setIncomingBlock(BBIdx, NewBB);
+ }
+ }
+
+ // If there are any other edges from TIBB to DestBB, update those to go
+ // through the split block, making those edges non-critical as well (and
+ // reducing the number of phi entries in the DestBB if relevant).
+ if (Options.MergeIdenticalEdges) {
+ for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) {
+ if (TI->getSuccessor(i) != DestBB) continue;
+
+ // Remove an entry for TIBB from DestBB phi nodes.
+ DestBB->removePredecessor(TIBB, Options.DontDeleteUselessPHIs);
+
+ // We found another edge to DestBB, go to NewBB instead.
+ TI->setSuccessor(i, NewBB);
+ }
+ }
+
+ // If we have nothing to update, just return.
+ auto *DT = Options.DT;
+ auto *LI = Options.LI;
+ if (!DT && !LI)
+ return NewBB;
+
+ // Now update analysis information. Since the only predecessor of NewBB is
+ // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate
+ // anything, as there are other successors of DestBB. However, if all other
+ // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a
+ // loop header) then NewBB dominates DestBB.
+ SmallVector<BasicBlock*, 8> OtherPreds;
+
+ // If there is a PHI in the block, loop over predecessors with it, which is
+ // faster than iterating pred_begin/end.
+ if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingBlock(i) != NewBB)
+ OtherPreds.push_back(PN->getIncomingBlock(i));
+ } else {
+ for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB);
+ I != E; ++I) {
+ BasicBlock *P = *I;
+ if (P != NewBB)
+ OtherPreds.push_back(P);
+ }
+ }
+
+ bool NewBBDominatesDestBB = true;
+
+ // Should we update DominatorTree information?
+ if (DT) {
+ DomTreeNode *TINode = DT->getNode(TIBB);
+
+ // The new block is not the immediate dominator for any other nodes, but
+ // TINode is the immediate dominator for the new node.
+ //
+ if (TINode) { // Don't break unreachable code!
+ DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB);
+ DomTreeNode *DestBBNode = nullptr;
+
+ // If NewBBDominatesDestBB hasn't been computed yet, do so with DT.
+ if (!OtherPreds.empty()) {
+ DestBBNode = DT->getNode(DestBB);
+ while (!OtherPreds.empty() && NewBBDominatesDestBB) {
+ if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back()))
+ NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode);
+ OtherPreds.pop_back();
+ }
+ OtherPreds.clear();
+ }
+
+ // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it
+ // doesn't dominate anything.
+ if (NewBBDominatesDestBB) {
+ if (!DestBBNode) DestBBNode = DT->getNode(DestBB);
+ DT->changeImmediateDominator(DestBBNode, NewBBNode);
+ }
+ }
+ }
+
+ // Update LoopInfo if it is around.
+ if (LI) {
+ if (Loop *TIL = LI->getLoopFor(TIBB)) {
+ // If one or the other blocks were not in a loop, the new block is not
+ // either, and thus LI doesn't need to be updated.
+ if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
+ if (TIL == DestLoop) {
+ // Both in the same loop, the NewBB joins loop.
+ DestLoop->addBasicBlockToLoop(NewBB, *LI);
+ } else if (TIL->contains(DestLoop)) {
+ // Edge from an outer loop to an inner loop. Add to the outer loop.
+ TIL->addBasicBlockToLoop(NewBB, *LI);
+ } else if (DestLoop->contains(TIL)) {
+ // Edge from an inner loop to an outer loop. Add to the outer loop.
+ DestLoop->addBasicBlockToLoop(NewBB, *LI);
+ } else {
+ // Edge from two loops with no containment relation. Because these
+ // are natural loops, we know that the destination block must be the
+ // header of its loop (adding a branch into a loop elsewhere would
+ // create an irreducible loop).
+ assert(DestLoop->getHeader() == DestBB &&
+ "Should not create irreducible loops!");
+ if (Loop *P = DestLoop->getParentLoop())
+ P->addBasicBlockToLoop(NewBB, *LI);
+ }
+ }
+
+ // If TIBB is in a loop and DestBB is outside of that loop, we may need
+ // to update LoopSimplify form and LCSSA form.
+ if (!TIL->contains(DestBB)) {
+ assert(!TIL->contains(NewBB) &&
+ "Split point for loop exit is contained in loop!");
+
+ // Update LCSSA form in the newly created exit block.
+ if (Options.PreserveLCSSA) {
+ createPHIsForSplitLoopExit(TIBB, NewBB, DestBB);
+ }
+
+ // The only that we can break LoopSimplify form by splitting a critical
+ // edge is if after the split there exists some edge from TIL to DestBB
+ // *and* the only edge into DestBB from outside of TIL is that of
+ // NewBB. If the first isn't true, then LoopSimplify still holds, NewBB
+ // is the new exit block and it has no non-loop predecessors. If the
+ // second isn't true, then DestBB was not in LoopSimplify form prior to
+ // the split as it had a non-loop predecessor. In both of these cases,
+ // the predecessor must be directly in TIL, not in a subloop, or again
+ // LoopSimplify doesn't hold.
+ SmallVector<BasicBlock *, 4> LoopPreds;
+ for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E;
+ ++I) {
+ BasicBlock *P = *I;
+ if (P == NewBB)
+ continue; // The new block is known.
+ if (LI->getLoopFor(P) != TIL) {
+ // No need to re-simplify, it wasn't to start with.
+ LoopPreds.clear();
+ break;
+ }
+ LoopPreds.push_back(P);
+ }
+ if (!LoopPreds.empty()) {
+ assert(!DestBB->isEHPad() && "We don't split edges to EH pads!");
+ BasicBlock *NewExitBB = SplitBlockPredecessors(
+ DestBB, LoopPreds, "split", DT, LI, Options.PreserveLCSSA);
+ if (Options.PreserveLCSSA)
+ createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB);
+ }
+ }
+ }
+ }
+
+ return NewBB;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
new file mode 100644
index 000000000000..b60dfb4f3541
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -0,0 +1,1015 @@
+//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some functions that will create standard C libcalls.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "build-libcalls"
+
+//- Infer Attributes ---------------------------------------------------------//
+
+STATISTIC(NumReadNone, "Number of functions inferred as readnone");
+STATISTIC(NumReadOnly, "Number of functions inferred as readonly");
+STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly");
+STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind");
+STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture");
+STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
+STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
+STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns");
+
+static bool setDoesNotAccessMemory(Function &F) {
+ if (F.doesNotAccessMemory())
+ return false;
+ F.setDoesNotAccessMemory();
+ ++NumReadNone;
+ return true;
+}
+
+static bool setOnlyReadsMemory(Function &F) {
+ if (F.onlyReadsMemory())
+ return false;
+ F.setOnlyReadsMemory();
+ ++NumReadOnly;
+ return true;
+}
+
+static bool setOnlyAccessesArgMemory(Function &F) {
+ if (F.onlyAccessesArgMemory())
+ return false;
+ F.setOnlyAccessesArgMemory();
+ ++NumArgMemOnly;
+ return true;
+}
+
+static bool setDoesNotThrow(Function &F) {
+ if (F.doesNotThrow())
+ return false;
+ F.setDoesNotThrow();
+ ++NumNoUnwind;
+ return true;
+}
+
+static bool setRetDoesNotAlias(Function &F) {
+ if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias))
+ return false;
+ F.addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
+ ++NumNoAlias;
+ return true;
+}
+
+static bool setDoesNotCapture(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::NoCapture))
+ return false;
+ F.addParamAttr(ArgNo, Attribute::NoCapture);
+ ++NumNoCapture;
+ return true;
+}
+
+static bool setOnlyReadsMemory(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::ReadOnly))
+ return false;
+ F.addParamAttr(ArgNo, Attribute::ReadOnly);
+ ++NumReadOnlyArg;
+ return true;
+}
+
+static bool setRetNonNull(Function &F) {
+ assert(F.getReturnType()->isPointerTy() &&
+ "nonnull applies only to pointers");
+ if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NonNull))
+ return false;
+ F.addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+ ++NumNonNull;
+ return true;
+}
+
+bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
+ LibFunc TheLibFunc;
+ if (!(TLI.getLibFunc(F, TheLibFunc) && TLI.has(TheLibFunc)))
+ return false;
+
+ bool Changed = false;
+ switch (TheLibFunc) {
+ case LibFunc_strlen:
+ case LibFunc_wcslen:
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
+ case LibFunc_strchr:
+ case LibFunc_strrchr:
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc_strtol:
+ case LibFunc_strtod:
+ case LibFunc_strtof:
+ case LibFunc_strtoul:
+ case LibFunc_strtoll:
+ case LibFunc_strtold:
+ case LibFunc_strtoull:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_strcpy:
+ case LibFunc_stpcpy:
+ case LibFunc_strcat:
+ case LibFunc_strncat:
+ case LibFunc_strncpy:
+ case LibFunc_stpncpy:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_strxfrm:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_strcmp: // 0,1
+ case LibFunc_strspn: // 0,1
+ case LibFunc_strncmp: // 0,1
+ case LibFunc_strcspn: // 0,1
+ case LibFunc_strcoll: // 0,1
+ case LibFunc_strcasecmp: // 0,1
+ case LibFunc_strncasecmp: //
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc_strstr:
+ case LibFunc_strpbrk:
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc_strtok:
+ case LibFunc_strtok_r:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_scanf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_setbuf:
+ case LibFunc_setvbuf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
+ case LibFunc_strdup:
+ case LibFunc_strndup:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_stat:
+ case LibFunc_statvfs:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_sscanf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_sprintf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_snprintf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc_setitimer:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_system:
+ // May throw; "system" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_malloc:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ return Changed;
+ case LibFunc_memcmp:
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc_memchr:
+ case LibFunc_memrchr:
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc_modf:
+ case LibFunc_modff:
+ case LibFunc_modfl:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc_memcpy:
+ case LibFunc_mempcpy:
+ case LibFunc_memccpy:
+ case LibFunc_memmove:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_memcpy_chk:
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc_memalign:
+ Changed |= setRetDoesNotAlias(F);
+ return Changed;
+ case LibFunc_mkdir:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_mktime:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
+ case LibFunc_realloc:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
+ case LibFunc_read:
+ // May throw; "read" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc_rewind:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
+ case LibFunc_rmdir:
+ case LibFunc_remove:
+ case LibFunc_realpath:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_rename:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_readlink:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_write:
+ // May throw; "write" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_bcopy:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_bcmp:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc_bzero:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
+ case LibFunc_calloc:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ return Changed;
+ case LibFunc_chmod:
+ case LibFunc_chown:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_ctermid:
+ case LibFunc_clearerr:
+ case LibFunc_closedir:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
+ case LibFunc_atoi:
+ case LibFunc_atol:
+ case LibFunc_atof:
+ case LibFunc_atoll:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
+ case LibFunc_access:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_fopen:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_fdopen:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_feof:
+ case LibFunc_free:
+ case LibFunc_fseek:
+ case LibFunc_ftell:
+ case LibFunc_fgetc:
+ case LibFunc_fseeko:
+ case LibFunc_ftello:
+ case LibFunc_fileno:
+ case LibFunc_fflush:
+ case LibFunc_fclose:
+ case LibFunc_fsetpos:
+ case LibFunc_flockfile:
+ case LibFunc_funlockfile:
+ case LibFunc_ftrylockfile:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
+ case LibFunc_ferror:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F);
+ return Changed;
+ case LibFunc_fputc:
+ case LibFunc_fstat:
+ case LibFunc_frexp:
+ case LibFunc_frexpf:
+ case LibFunc_frexpl:
+ case LibFunc_fstatvfs:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc_fgets:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc_fread:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 3);
+ return Changed;
+ case LibFunc_fwrite:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 3);
+ // FIXME: readonly #1?
+ return Changed;
+ case LibFunc_fputs:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_fscanf:
+ case LibFunc_fprintf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_fgetpos:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc_getc:
+ case LibFunc_getlogin_r:
+ case LibFunc_getc_unlocked:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
+ case LibFunc_getenv:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
+ case LibFunc_gets:
+ case LibFunc_getchar:
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc_getitimer:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc_getpwnam:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_ungetc:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc_uname:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
+ case LibFunc_unlink:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_unsetenv:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_utime:
+ case LibFunc_utimes:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_putc:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc_puts:
+ case LibFunc_printf:
+ case LibFunc_perror:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_pread:
+ // May throw; "pread" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc_pwrite:
+ // May throw; "pwrite" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_putchar:
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc_popen:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_pclose:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
+ case LibFunc_vscanf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_vsscanf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_vfscanf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_valloc:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ return Changed;
+ case LibFunc_vprintf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_vfprintf:
+ case LibFunc_vsprintf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_vsnprintf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc_open:
+ // May throw; "open" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_opendir:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_tmpfile:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ return Changed;
+ case LibFunc_times:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
+ case LibFunc_htonl:
+ case LibFunc_htons:
+ case LibFunc_ntohl:
+ case LibFunc_ntohs:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAccessMemory(F);
+ return Changed;
+ case LibFunc_lstat:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_lchown:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_qsort:
+ // May throw; places call through function pointer.
+ Changed |= setDoesNotCapture(F, 3);
+ return Changed;
+ case LibFunc_dunder_strdup:
+ case LibFunc_dunder_strndup:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_dunder_strtok_r:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_under_IO_getc:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
+ case LibFunc_under_IO_putc:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc_dunder_isoc99_scanf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_stat64:
+ case LibFunc_lstat64:
+ case LibFunc_statvfs64:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_dunder_isoc99_sscanf:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_fopen64:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 0);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc_fseeko64:
+ case LibFunc_ftello64:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ return Changed;
+ case LibFunc_tmpfile64:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setRetDoesNotAlias(F);
+ return Changed;
+ case LibFunc_fstat64:
+ case LibFunc_fstatvfs64:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc_open64:
+ // May throw; "open" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setOnlyReadsMemory(F, 0);
+ return Changed;
+ case LibFunc_gettimeofday:
+ // Currently some platforms have the restrict keyword on the arguments to
+ // gettimeofday. To be conservative, do not add noalias to gettimeofday's
+ // arguments.
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc_Znwj: // new(unsigned int)
+ case LibFunc_Znwm: // new(unsigned long)
+ case LibFunc_Znaj: // new[](unsigned int)
+ case LibFunc_Znam: // new[](unsigned long)
+ case LibFunc_msvc_new_int: // new(unsigned int)
+ case LibFunc_msvc_new_longlong: // new(unsigned long long)
+ case LibFunc_msvc_new_array_int: // new[](unsigned int)
+ case LibFunc_msvc_new_array_longlong: // new[](unsigned long long)
+ // Operator new always returns a nonnull noalias pointer
+ Changed |= setRetNonNull(F);
+ Changed |= setRetDoesNotAlias(F);
+ return Changed;
+ //TODO: add LibFunc entries for:
+ //case LibFunc_memset_pattern4:
+ //case LibFunc_memset_pattern8:
+ case LibFunc_memset_pattern16:
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setDoesNotCapture(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ // int __nvvm_reflect(const char *)
+ case LibFunc_nvvm_reflect:
+ Changed |= setDoesNotAccessMemory(F);
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+
+ default:
+ // FIXME: It'd be really nice to cover all the library functions we're
+ // aware of here.
+ return false;
+ }
+}
+
+//- Emit LibCalls ------------------------------------------------------------//
+
+Value *llvm::castToCStr(Value *V, IRBuilder<> &B) {
+ unsigned AS = V->getType()->getPointerAddressSpace();
+ return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr");
+}
+
+Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc_strlen))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Constant *StrLen = M->getOrInsertFunction("strlen", DL.getIntPtrType(Context),
+ B.getInt8PtrTy());
+ inferLibFuncAttributes(*M->getFunction("strlen"), *TLI);
+ CallInst *CI = B.CreateCall(StrLen, castToCStr(Ptr, B), "strlen");
+ if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc_strchr))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ Type *I8Ptr = B.getInt8PtrTy();
+ Type *I32Ty = B.getInt32Ty();
+ Constant *StrChr =
+ M->getOrInsertFunction("strchr", I8Ptr, I8Ptr, I32Ty);
+ inferLibFuncAttributes(*M->getFunction("strchr"), *TLI);
+ CallInst *CI = B.CreateCall(
+ StrChr, {castToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, "strchr");
+ if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc_strncmp))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *StrNCmp = M->getOrInsertFunction("strncmp", B.getInt32Ty(),
+ B.getInt8PtrTy(), B.getInt8PtrTy(),
+ DL.getIntPtrType(Context));
+ inferLibFuncAttributes(*M->getFunction("strncmp"), *TLI);
+ CallInst *CI = B.CreateCall(
+ StrNCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, "strncmp");
+
+ if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI, StringRef Name) {
+ if (!TLI->has(LibFunc_strcpy))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ Type *I8Ptr = B.getInt8PtrTy();
+ Value *StrCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr);
+ inferLibFuncAttributes(*M->getFunction(Name), *TLI);
+ CallInst *CI =
+ B.CreateCall(StrCpy, {castToCStr(Dst, B), castToCStr(Src, B)}, Name);
+ if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI, StringRef Name) {
+ if (!TLI->has(LibFunc_strncpy))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ Type *I8Ptr = B.getInt8PtrTy();
+ Value *StrNCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr,
+ Len->getType());
+ inferLibFuncAttributes(*M->getFunction(Name), *TLI);
+ CallInst *CI = B.CreateCall(
+ StrNCpy, {castToCStr(Dst, B), castToCStr(Src, B), Len}, "strncpy");
+ if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
+ IRBuilder<> &B, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc_memcpy_chk))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ AttributeList AS;
+ AS = AttributeList::get(M->getContext(), AttributeList::FunctionIndex,
+ Attribute::NoUnwind);
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *MemCpy = M->getOrInsertFunction(
+ "__memcpy_chk", AttributeList::get(M->getContext(), AS), B.getInt8PtrTy(),
+ B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context),
+ DL.getIntPtrType(Context));
+ Dst = castToCStr(Dst, B);
+ Src = castToCStr(Src, B);
+ CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize});
+ if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc_memchr))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *MemChr = M->getOrInsertFunction("memchr", B.getInt8PtrTy(),
+ B.getInt8PtrTy(), B.getInt32Ty(),
+ DL.getIntPtrType(Context));
+ inferLibFuncAttributes(*M->getFunction("memchr"), *TLI);
+ CallInst *CI = B.CreateCall(MemChr, {castToCStr(Ptr, B), Val, Len}, "memchr");
+
+ if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc_memcmp))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *MemCmp = M->getOrInsertFunction("memcmp", B.getInt32Ty(),
+ B.getInt8PtrTy(), B.getInt8PtrTy(),
+ DL.getIntPtrType(Context));
+ inferLibFuncAttributes(*M->getFunction("memcmp"), *TLI);
+ CallInst *CI = B.CreateCall(
+ MemCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, "memcmp");
+
+ if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// Append a suffix to the function name according to the type of 'Op'.
+static void appendTypeSuffix(Value *Op, StringRef &Name,
+ SmallString<20> &NameBuffer) {
+ if (!Op->getType()->isDoubleTy()) {
+ NameBuffer += Name;
+
+ if (Op->getType()->isFloatTy())
+ NameBuffer += 'f';
+ else
+ NameBuffer += 'l';
+
+ Name = NameBuffer;
+ }
+}
+
+Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
+ const AttributeList &Attrs) {
+ SmallString<20> NameBuffer;
+ appendTypeSuffix(Op, Name, NameBuffer);
+
+ Module *M = B.GetInsertBlock()->getModule();
+ Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+ Op->getType());
+ CallInst *CI = B.CreateCall(Callee, Op, Name);
+
+ // The incoming attribute set may have come from a speculatable intrinsic, but
+ // is being replaced with a library call which is not allowed to be
+ // speculatable.
+ CI->setAttributes(Attrs.removeAttribute(B.getContext(),
+ AttributeList::FunctionIndex,
+ Attribute::Speculatable));
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
+ IRBuilder<> &B, const AttributeList &Attrs) {
+ SmallString<20> NameBuffer;
+ appendTypeSuffix(Op1, Name, NameBuffer);
+
+ Module *M = B.GetInsertBlock()->getModule();
+ Value *Callee = M->getOrInsertFunction(Name, Op1->getType(), Op1->getType(),
+ Op2->getType());
+ CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name);
+ CI->setAttributes(Attrs);
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc_putchar))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(), B.getInt32Ty());
+ inferLibFuncAttributes(*M->getFunction("putchar"), *TLI);
+ CallInst *CI = B.CreateCall(PutChar,
+ B.CreateIntCast(Char,
+ B.getInt32Ty(),
+ /*isSigned*/true,
+ "chari"),
+ "putchar");
+
+ if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+Value *llvm::emitPutS(Value *Str, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc_puts))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ Value *PutS =
+ M->getOrInsertFunction("puts", B.getInt32Ty(), B.getInt8PtrTy());
+ inferLibFuncAttributes(*M->getFunction("puts"), *TLI);
+ CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), "puts");
+ if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc_fputc))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ Constant *F = M->getOrInsertFunction("fputc", B.getInt32Ty(), B.getInt32Ty(),
+ File->getType());
+ if (File->getType()->isPointerTy())
+ inferLibFuncAttributes(*M->getFunction("fputc"), *TLI);
+ Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
+ "chari");
+ CallInst *CI = B.CreateCall(F, {Char, File}, "fputc");
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+ return CI;
+}
+
+Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc_fputs))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ StringRef FPutsName = TLI->getName(LibFunc_fputs);
+ Constant *F = M->getOrInsertFunction(
+ FPutsName, B.getInt32Ty(), B.getInt8PtrTy(), File->getType());
+ if (File->getType()->isPointerTy())
+ inferLibFuncAttributes(*M->getFunction(FPutsName), *TLI);
+ CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, "fputs");
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+ return CI;
+}
+
+Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc_fwrite))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ StringRef FWriteName = TLI->getName(LibFunc_fwrite);
+ Constant *F = M->getOrInsertFunction(
+ FWriteName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
+ DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
+
+ if (File->getType()->isPointerTy())
+ inferLibFuncAttributes(*M->getFunction(FWriteName), *TLI);
+ CallInst *CI =
+ B.CreateCall(F, {castToCStr(Ptr, B), Size,
+ ConstantInt::get(DL.getIntPtrType(Context), 1), File});
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+ return CI;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
new file mode 100644
index 000000000000..83ec7f55d1af
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -0,0 +1,479 @@
+//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an optimization for div and rem on architectures that
+// execute short instructions significantly faster than longer instructions.
+// For example, on Intel Atom 32-bit divides are slow enough that during
+// runtime it is profitable to check the value of the operands, and if they are
+// positive and less than 256 use an unsigned 8-bit divide.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "bypass-slow-division"
+
+namespace {
+ struct DivOpInfo {
+ bool SignedOp;
+ Value *Dividend;
+ Value *Divisor;
+
+ DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
+ : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
+ };
+
+ struct QuotRemPair {
+ Value *Quotient;
+ Value *Remainder;
+
+ QuotRemPair(Value *InQuotient, Value *InRemainder)
+ : Quotient(InQuotient), Remainder(InRemainder) {}
+ };
+
+ /// A quotient and remainder, plus a BB from which they logically "originate".
+ /// If you use Quotient or Remainder in a Phi node, you should use BB as its
+ /// corresponding predecessor.
+ struct QuotRemWithBB {
+ BasicBlock *BB = nullptr;
+ Value *Quotient = nullptr;
+ Value *Remainder = nullptr;
+ };
+}
+
+namespace llvm {
+ template<>
+ struct DenseMapInfo<DivOpInfo> {
+ static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
+ return Val1.SignedOp == Val2.SignedOp &&
+ Val1.Dividend == Val2.Dividend &&
+ Val1.Divisor == Val2.Divisor;
+ }
+
+ static DivOpInfo getEmptyKey() {
+ return DivOpInfo(false, nullptr, nullptr);
+ }
+
+ static DivOpInfo getTombstoneKey() {
+ return DivOpInfo(true, nullptr, nullptr);
+ }
+
+ static unsigned getHashValue(const DivOpInfo &Val) {
+ return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
+ reinterpret_cast<uintptr_t>(Val.Divisor)) ^
+ (unsigned)Val.SignedOp;
+ }
+ };
+
+ typedef DenseMap<DivOpInfo, QuotRemPair> DivCacheTy;
+ typedef DenseMap<unsigned, unsigned> BypassWidthsTy;
+ typedef SmallPtrSet<Instruction *, 4> VisitedSetTy;
+}
+
+namespace {
+enum ValueRange {
+ /// Operand definitely fits into BypassType. No runtime checks are needed.
+ VALRNG_KNOWN_SHORT,
+ /// A runtime check is required, as value range is unknown.
+ VALRNG_UNKNOWN,
+ /// Operand is unlikely to fit into BypassType. The bypassing should be
+ /// disabled.
+ VALRNG_LIKELY_LONG
+};
+
+class FastDivInsertionTask {
+ bool IsValidTask = false;
+ Instruction *SlowDivOrRem = nullptr;
+ IntegerType *BypassType = nullptr;
+ BasicBlock *MainBB = nullptr;
+
+ bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
+ ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
+ QuotRemWithBB createSlowBB(BasicBlock *Successor);
+ QuotRemWithBB createFastBB(BasicBlock *Successor);
+ QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
+ BasicBlock *PhiBB);
+ Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2);
+ Optional<QuotRemPair> insertFastDivAndRem();
+
+ bool isSignedOp() {
+ return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
+ SlowDivOrRem->getOpcode() == Instruction::SRem;
+ }
+ bool isDivisionOp() {
+ return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
+ SlowDivOrRem->getOpcode() == Instruction::UDiv;
+ }
+ Type *getSlowType() { return SlowDivOrRem->getType(); }
+
+public:
+ FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
+ Value *getReplacement(DivCacheTy &Cache);
+};
+} // anonymous namespace
+
+FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
+ const BypassWidthsTy &BypassWidths) {
+ switch (I->getOpcode()) {
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ SlowDivOrRem = I;
+ break;
+ default:
+ // I is not a div/rem operation.
+ return;
+ }
+
+ // Skip division on vector types. Only optimize integer instructions.
+ IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
+ if (!SlowType)
+ return;
+
+ // Skip if this bitwidth is not bypassed.
+ auto BI = BypassWidths.find(SlowType->getBitWidth());
+ if (BI == BypassWidths.end())
+ return;
+
+ // Get type for div/rem instruction with bypass bitwidth.
+ IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
+ BypassType = BT;
+
+ // The original basic block.
+ MainBB = I->getParent();
+
+ // The instruction is indeed a slow div or rem operation.
+ IsValidTask = true;
+}
+
+/// Reuses previously-computed dividend or remainder from the current BB if
+/// operands and operation are identical. Otherwise calls insertFastDivAndRem to
+/// perform the optimization and caches the resulting dividend and remainder.
+/// If no replacement can be generated, nullptr is returned.
+Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
+ // First, make sure that the task is valid.
+ if (!IsValidTask)
+ return nullptr;
+
+ // Then, look for a value in Cache.
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
+ DivOpInfo Key(isSignedOp(), Dividend, Divisor);
+ auto CacheI = Cache.find(Key);
+
+ if (CacheI == Cache.end()) {
+ // If previous instance does not exist, try to insert fast div.
+ Optional<QuotRemPair> OptResult = insertFastDivAndRem();
+ // Bail out if insertFastDivAndRem has failed.
+ if (!OptResult)
+ return nullptr;
+ CacheI = Cache.insert({Key, *OptResult}).first;
+ }
+
+ QuotRemPair &Value = CacheI->second;
+ return isDivisionOp() ? Value.Quotient : Value.Remainder;
+}
+
+/// \brief Check if a value looks like a hash.
+///
+/// The routine is expected to detect values computed using the most common hash
+/// algorithms. Typically, hash computations end with one of the following
+/// instructions:
+///
+/// 1) MUL with a constant wider than BypassType
+/// 2) XOR instruction
+///
+/// And even if we are wrong and the value is not a hash, it is still quite
+/// unlikely that such values will fit into BypassType.
+///
+/// To detect string hash algorithms like FNV we have to look through PHI-nodes.
+/// It is implemented as a depth-first search for values that look neither long
+/// nor hash-like.
+bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ switch (I->getOpcode()) {
+ case Instruction::Xor:
+ return true;
+ case Instruction::Mul: {
+ // After Constant Hoisting pass, long constants may be represented as
+ // bitcast instructions. As a result, some constants may look like an
+ // instruction at first, and an additional check is necessary to find out if
+ // an operand is actually a constant.
+ Value *Op1 = I->getOperand(1);
+ ConstantInt *C = dyn_cast<ConstantInt>(Op1);
+ if (!C && isa<BitCastInst>(Op1))
+ C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
+ return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth();
+ }
+ case Instruction::PHI: {
+ // Stop IR traversal in case of a crazy input code. This limits recursion
+ // depth.
+ if (Visited.size() >= 16)
+ return false;
+ // Do not visit nodes that have been visited already. We return true because
+ // it means that we couldn't find any value that doesn't look hash-like.
+ if (Visited.find(I) != Visited.end())
+ return true;
+ Visited.insert(I);
+ return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
+ // Ignore undef values as they probably don't affect the division
+ // operands.
+ return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
+ isa<UndefValue>(V);
+ });
+ }
+ default:
+ return false;
+ }
+}
+
+/// Check if an integer value fits into our bypass type.
+ValueRange FastDivInsertionTask::getValueRange(Value *V,
+ VisitedSetTy &Visited) {
+ unsigned ShortLen = BypassType->getBitWidth();
+ unsigned LongLen = V->getType()->getIntegerBitWidth();
+
+ assert(LongLen > ShortLen && "Value type must be wider than BypassType");
+ unsigned HiBits = LongLen - ShortLen;
+
+ const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout();
+ KnownBits Known(LongLen);
+
+ computeKnownBits(V, Known, DL);
+
+ if (Known.countMinLeadingZeros() >= HiBits)
+ return VALRNG_KNOWN_SHORT;
+
+ if (Known.countMaxLeadingZeros() < HiBits)
+ return VALRNG_LIKELY_LONG;
+
+ // Long integer divisions are often used in hashtable implementations. It's
+ // not worth bypassing such divisions because hash values are extremely
+ // unlikely to have enough leading zeros. The call below tries to detect
+ // values that are unlikely to fit BypassType (including hashes).
+ if (isHashLikeValue(V, Visited))
+ return VALRNG_LIKELY_LONG;
+
+ return VALRNG_UNKNOWN;
+}
+
+/// Add new basic block for slow div and rem operations and put it before
+/// SuccessorBB.
+QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
+ QuotRemWithBB DivRemPair;
+ DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
+ MainBB->getParent(), SuccessorBB);
+ IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
+
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
+
+ if (isSignedOp()) {
+ DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
+ DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
+ } else {
+ DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
+ DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
+ }
+
+ Builder.CreateBr(SuccessorBB);
+ return DivRemPair;
+}
+
+/// Add new basic block for fast div and rem operations and put it before
+/// SuccessorBB.
+QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
+ QuotRemWithBB DivRemPair;
+ DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
+ MainBB->getParent(), SuccessorBB);
+ IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
+
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
+ Value *ShortDivisorV =
+ Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
+ Value *ShortDividendV =
+ Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
+
+ // udiv/urem because this optimization only handles positive numbers.
+ Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
+ Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
+ DivRemPair.Quotient =
+ Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
+ DivRemPair.Remainder =
+ Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
+ Builder.CreateBr(SuccessorBB);
+
+ return DivRemPair;
+}
+
+/// Creates Phi nodes for result of Div and Rem.
+QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
+ QuotRemWithBB &RHS,
+ BasicBlock *PhiBB) {
+ IRBuilder<> Builder(PhiBB, PhiBB->begin());
+ PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
+ QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
+ QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
+ PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
+ RemPhi->addIncoming(LHS.Remainder, LHS.BB);
+ RemPhi->addIncoming(RHS.Remainder, RHS.BB);
+ return QuotRemPair(QuoPhi, RemPhi);
+}
+
+/// Creates a runtime check to test whether both the divisor and dividend fit
+/// into BypassType. The check is inserted at the end of MainBB. True return
+/// value means that the operands fit. Either of the operands may be NULL if it
+/// doesn't need a runtime check.
+Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {
+ assert((Op1 || Op2) && "Nothing to check");
+ IRBuilder<> Builder(MainBB, MainBB->end());
+
+ Value *OrV;
+ if (Op1 && Op2)
+ OrV = Builder.CreateOr(Op1, Op2);
+ else
+ OrV = Op1 ? Op1 : Op2;
+
+ // BitMask is inverted to check if the operands are
+ // larger than the bypass type
+ uint64_t BitMask = ~BypassType->getBitMask();
+ Value *AndV = Builder.CreateAnd(OrV, BitMask);
+
+ // Compare operand values
+ Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
+ return Builder.CreateICmpEQ(AndV, ZeroV);
+}
+
+/// Substitutes the div/rem instruction with code that checks the value of the
+/// operands and uses a shorter-faster div/rem instruction when possible.
+Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
+
+ if (isa<ConstantInt>(Divisor)) {
+ // Keep division by a constant for DAGCombiner.
+ return None;
+ }
+
+ VisitedSetTy SetL;
+ ValueRange DividendRange = getValueRange(Dividend, SetL);
+ if (DividendRange == VALRNG_LIKELY_LONG)
+ return None;
+
+ VisitedSetTy SetR;
+ ValueRange DivisorRange = getValueRange(Divisor, SetR);
+ if (DivisorRange == VALRNG_LIKELY_LONG)
+ return None;
+
+ bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
+ bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
+
+ if (DividendShort && DivisorShort) {
+ // If both operands are known to be short then just replace the long
+ // division with a short one in-place.
+
+ IRBuilder<> Builder(SlowDivOrRem);
+ Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
+ Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
+ Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
+ Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
+ Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
+ Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
+ return QuotRemPair(ExtDiv, ExtRem);
+ } else if (DividendShort && !isSignedOp()) {
+ // If the division is unsigned and Dividend is known to be short, then
+ // either
+ // 1) Divisor is less or equal to Dividend, and the result can be computed
+ // with a short division.
+ // 2) Divisor is greater than Dividend. In this case, no division is needed
+ // at all: The quotient is 0 and the remainder is equal to Dividend.
+ //
+ // So instead of checking at runtime whether Divisor fits into BypassType,
+ // we emit a runtime check to differentiate between these two cases. This
+ // lets us entirely avoid a long div.
+
+ // Split the basic block before the div/rem.
+ BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
+ // Remove the unconditional branch from MainBB to SuccessorBB.
+ MainBB->getInstList().back().eraseFromParent();
+ QuotRemWithBB Long;
+ Long.BB = MainBB;
+ Long.Quotient = ConstantInt::get(getSlowType(), 0);
+ Long.Remainder = Dividend;
+ QuotRemWithBB Fast = createFastBB(SuccessorBB);
+ QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
+ IRBuilder<> Builder(MainBB, MainBB->end());
+ Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
+ Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
+ return Result;
+ } else {
+ // General case. Create both slow and fast div/rem pairs and choose one of
+ // them at runtime.
+
+ // Split the basic block before the div/rem.
+ BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
+ // Remove the unconditional branch from MainBB to SuccessorBB.
+ MainBB->getInstList().back().eraseFromParent();
+ QuotRemWithBB Fast = createFastBB(SuccessorBB);
+ QuotRemWithBB Slow = createSlowBB(SuccessorBB);
+ QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
+ Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,
+ DivisorShort ? nullptr : Divisor);
+ IRBuilder<> Builder(MainBB, MainBB->end());
+ Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
+ return Result;
+ }
+}
+
+/// This optimization identifies DIV/REM instructions in a BB that can be
+/// profitably bypassed and carried out with a shorter, faster divide.
+bool llvm::bypassSlowDivision(BasicBlock *BB,
+ const BypassWidthsTy &BypassWidths) {
+ DivCacheTy PerBBDivCache;
+
+ bool MadeChange = false;
+ Instruction* Next = &*BB->begin();
+ while (Next != nullptr) {
+ // We may add instructions immediately after I, but we want to skip over
+ // them.
+ Instruction* I = Next;
+ Next = Next->getNextNode();
+
+ FastDivInsertionTask Task(I, BypassWidths);
+ if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
+ I->replaceAllUsesWith(Replacement);
+ I->eraseFromParent();
+ MadeChange = true;
+ }
+ }
+
+ // Above we eagerly create divs and rems, as pairs, so that we can efficiently
+ // create divrem machine instructions. Now erase any unused divs / rems so we
+ // don't leave extra instructions sitting around.
+ for (auto &KV : PerBBDivCache)
+ for (Value *V : {KV.second.Quotient, KV.second.Remainder})
+ RecursivelyDeleteTriviallyDeadInstructions(V);
+
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
new file mode 100644
index 000000000000..7e75e8847785
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -0,0 +1,833 @@
+//===- CloneFunction.cpp - Clone a function into another function ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneFunctionInto interface, which is used as the
+// low-level function cloner. This is used by the CloneFunction and function
+// inliner to do the dirty work of copying the body of a function around.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <map>
+using namespace llvm;
+
+/// See comments in Cloning.h.
+BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
+ const Twine &NameSuffix, Function *F,
+ ClonedCodeInfo *CodeInfo,
+ DebugInfoFinder *DIFinder) {
+ DenseMap<const MDNode *, MDNode *> Cache;
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
+ if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+
+ bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+ Module *TheModule = F ? F->getParent() : nullptr;
+
+ // Loop over all instructions, and copy them over.
+ for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
+ II != IE; ++II) {
+
+ if (DIFinder && TheModule) {
+ if (auto *DDI = dyn_cast<DbgDeclareInst>(II))
+ DIFinder->processDeclare(*TheModule, DDI);
+ else if (auto *DVI = dyn_cast<DbgValueInst>(II))
+ DIFinder->processValue(*TheModule, DVI);
+
+ if (auto DbgLoc = II->getDebugLoc())
+ DIFinder->processLocation(*TheModule, DbgLoc.get());
+ }
+
+ Instruction *NewInst = II->clone();
+ if (II->hasName())
+ NewInst->setName(II->getName()+NameSuffix);
+ NewBB->getInstList().push_back(NewInst);
+ VMap[&*II] = NewInst; // Add instruction map to value.
+
+ hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (isa<ConstantInt>(AI->getArraySize()))
+ hasStaticAllocas = true;
+ else
+ hasDynamicAllocas = true;
+ }
+ }
+
+ if (CodeInfo) {
+ CodeInfo->ContainsCalls |= hasCalls;
+ CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+ CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
+ BB != &BB->getParent()->getEntryBlock();
+ }
+ return NewBB;
+}
+
+// Clone OldFunc into NewFunc, transforming the old arguments into references to
+// VMap values.
+//
+void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
+ ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
+ SmallVectorImpl<ReturnInst*> &Returns,
+ const char *NameSuffix, ClonedCodeInfo *CodeInfo,
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
+ assert(NameSuffix && "NameSuffix cannot be null!");
+
+#ifndef NDEBUG
+ for (const Argument &I : OldFunc->args())
+ assert(VMap.count(&I) && "No mapping from source argument specified!");
+#endif
+
+ // Copy all attributes other than those stored in the AttributeList. We need
+ // to remap the parameter indices of the AttributeList.
+ AttributeList NewAttrs = NewFunc->getAttributes();
+ NewFunc->copyAttributesFrom(OldFunc);
+ NewFunc->setAttributes(NewAttrs);
+
+ // Fix up the personality function that got copied over.
+ if (OldFunc->hasPersonalityFn())
+ NewFunc->setPersonalityFn(
+ MapValue(OldFunc->getPersonalityFn(), VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer));
+
+ SmallVector<AttributeSet, 4> NewArgAttrs(NewFunc->arg_size());
+ AttributeList OldAttrs = OldFunc->getAttributes();
+
+ // Clone any argument attributes that are present in the VMap.
+ for (const Argument &OldArg : OldFunc->args()) {
+ if (Argument *NewArg = dyn_cast<Argument>(VMap[&OldArg])) {
+ NewArgAttrs[NewArg->getArgNo()] =
+ OldAttrs.getParamAttributes(OldArg.getArgNo());
+ }
+ }
+
+ NewFunc->setAttributes(
+ AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(),
+ OldAttrs.getRetAttributes(), NewArgAttrs));
+
+ bool MustCloneSP =
+ OldFunc->getParent() && OldFunc->getParent() == NewFunc->getParent();
+ DISubprogram *SP = OldFunc->getSubprogram();
+ if (SP) {
+ assert(!MustCloneSP || ModuleLevelChanges);
+ // Add mappings for some DebugInfo nodes that we don't want duplicated
+ // even if they're distinct.
+ auto &MD = VMap.MD();
+ MD[SP->getUnit()].reset(SP->getUnit());
+ MD[SP->getType()].reset(SP->getType());
+ MD[SP->getFile()].reset(SP->getFile());
+ // If we're not cloning into the same module, no need to clone the
+ // subprogram
+ if (!MustCloneSP)
+ MD[SP].reset(SP);
+ }
+
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ OldFunc->getAllMetadata(MDs);
+ for (auto MD : MDs) {
+ NewFunc->addMetadata(
+ MD.first,
+ *MapMetadata(MD.second, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer));
+ }
+
+ // When we remap instructions, we want to avoid duplicating inlined
+ // DISubprograms, so record all subprograms we find as we duplicate
+ // instructions and then freeze them in the MD map.
+ // We also record information about dbg.value and dbg.declare to avoid
+ // duplicating the types.
+ DebugInfoFinder DIFinder;
+
+ // Loop over all of the basic blocks in the function, cloning them as
+ // appropriate. Note that we save BE this way in order to handle cloning of
+ // recursive functions into themselves.
+ //
+ for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
+ BI != BE; ++BI) {
+ const BasicBlock &BB = *BI;
+
+ // Create a new basic block and copy instructions into it!
+ BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo,
+ SP ? &DIFinder : nullptr);
+
+ // Add basic block mapping.
+ VMap[&BB] = CBB;
+
+ // It is only legal to clone a function if a block address within that
+ // function is never referenced outside of the function. Given that, we
+ // want to map block addresses from the old function to block addresses in
+ // the clone. (This is different from the generic ValueMapper
+ // implementation, which generates an invalid blockaddress when
+ // cloning a function.)
+ if (BB.hasAddressTaken()) {
+ Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
+ const_cast<BasicBlock*>(&BB));
+ VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);
+ }
+
+ // Note return instructions for the caller.
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
+ Returns.push_back(RI);
+ }
+
+ for (DISubprogram *ISP : DIFinder.subprograms()) {
+ if (ISP != SP) {
+ VMap.MD()[ISP].reset(ISP);
+ }
+ }
+
+ for (auto *Type : DIFinder.types()) {
+ VMap.MD()[Type].reset(Type);
+ }
+
+ // Loop over all of the instructions in the function, fixing up operand
+ // references as we go. This uses VMap to do all the hard work.
+ for (Function::iterator BB =
+ cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(),
+ BE = NewFunc->end();
+ BB != BE; ++BB)
+ // Loop over all instructions, fixing each one as we find it...
+ for (Instruction &II : *BB)
+ RemapInstruction(&II, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer);
+}
+
+/// Return a copy of the specified function and add it to that function's
+/// module. Also, any references specified in the VMap are changed to refer to
+/// their mapped value instead of the original one. If any of the arguments to
+/// the function are in the VMap, the arguments are deleted from the resultant
+/// function. The VMap is updated to include mappings from all of the
+/// instructions and basicblocks in the function from their old to new values.
+///
+Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap,
+ ClonedCodeInfo *CodeInfo) {
+ std::vector<Type*> ArgTypes;
+
+ // The user might be deleting arguments to the function by specifying them in
+ // the VMap. If so, we need to not add the arguments to the arg ty vector
+ //
+ for (const Argument &I : F->args())
+ if (VMap.count(&I) == 0) // Haven't mapped the argument to anything yet?
+ ArgTypes.push_back(I.getType());
+
+ // Create a new function type...
+ FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(),
+ ArgTypes, F->getFunctionType()->isVarArg());
+
+ // Create the new function...
+ Function *NewF =
+ Function::Create(FTy, F->getLinkage(), F->getName(), F->getParent());
+
+ // Loop over the arguments, copying the names of the mapped arguments over...
+ Function::arg_iterator DestI = NewF->arg_begin();
+ for (const Argument & I : F->args())
+ if (VMap.count(&I) == 0) { // Is this argument preserved?
+ DestI->setName(I.getName()); // Copy the name over...
+ VMap[&I] = &*DestI++; // Add mapping to VMap
+ }
+
+ SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
+ CloneFunctionInto(NewF, F, VMap, F->getSubprogram() != nullptr, Returns, "",
+ CodeInfo);
+
+ return NewF;
+}
+
+
+
+namespace {
+ /// This is a private class used to implement CloneAndPruneFunctionInto.
+ struct PruningFunctionCloner {
+ Function *NewFunc;
+ const Function *OldFunc;
+ ValueToValueMapTy &VMap;
+ bool ModuleLevelChanges;
+ const char *NameSuffix;
+ ClonedCodeInfo *CodeInfo;
+
+ public:
+ PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
+ ValueToValueMapTy &valueMap, bool moduleLevelChanges,
+ const char *nameSuffix, ClonedCodeInfo *codeInfo)
+ : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap),
+ ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix),
+ CodeInfo(codeInfo) {}
+
+ /// The specified block is found to be reachable, clone it and
+ /// anything that it can reach.
+ void CloneBlock(const BasicBlock *BB,
+ BasicBlock::const_iterator StartingInst,
+ std::vector<const BasicBlock*> &ToClone);
+ };
+}
+
+/// The specified block is found to be reachable, clone it and
+/// anything that it can reach.
+void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
+ BasicBlock::const_iterator StartingInst,
+ std::vector<const BasicBlock*> &ToClone){
+ WeakTrackingVH &BBEntry = VMap[BB];
+
+ // Have we already cloned this block?
+ if (BBEntry) return;
+
+ // Nope, clone it now.
+ BasicBlock *NewBB;
+ BBEntry = NewBB = BasicBlock::Create(BB->getContext());
+ if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+
+ // It is only legal to clone a function if a block address within that
+ // function is never referenced outside of the function. Given that, we
+ // want to map block addresses from the old function to block addresses in
+ // the clone. (This is different from the generic ValueMapper
+ // implementation, which generates an invalid blockaddress when
+ // cloning a function.)
+ //
+ // Note that we don't need to fix the mapping for unreachable blocks;
+ // the default mapping there is safe.
+ if (BB->hasAddressTaken()) {
+ Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
+ const_cast<BasicBlock*>(BB));
+ VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB);
+ }
+
+ bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+
+ // Loop over all instructions, and copy them over, DCE'ing as we go. This
+ // loop doesn't include the terminator.
+ for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end();
+ II != IE; ++II) {
+
+ Instruction *NewInst = II->clone();
+
+ // Eagerly remap operands to the newly cloned instruction, except for PHI
+ // nodes for which we defer processing until we update the CFG.
+ if (!isa<PHINode>(NewInst)) {
+ RemapInstruction(NewInst, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+
+ // If we can simplify this instruction to some other value, simply add
+ // a mapping to that value rather than inserting a new instruction into
+ // the basic block.
+ if (Value *V =
+ SimplifyInstruction(NewInst, BB->getModule()->getDataLayout())) {
+ // On the off-chance that this simplifies to an instruction in the old
+ // function, map it back into the new function.
+ if (Value *MappedV = VMap.lookup(V))
+ V = MappedV;
+
+ if (!NewInst->mayHaveSideEffects()) {
+ VMap[&*II] = V;
+ NewInst->deleteValue();
+ continue;
+ }
+ }
+ }
+
+ if (II->hasName())
+ NewInst->setName(II->getName()+NameSuffix);
+ VMap[&*II] = NewInst; // Add instruction map to value.
+ NewBB->getInstList().push_back(NewInst);
+ hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+
+ if (CodeInfo)
+ if (auto CS = ImmutableCallSite(&*II))
+ if (CS.hasOperandBundles())
+ CodeInfo->OperandBundleCallSites.push_back(NewInst);
+
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (isa<ConstantInt>(AI->getArraySize()))
+ hasStaticAllocas = true;
+ else
+ hasDynamicAllocas = true;
+ }
+ }
+
+ // Finally, clone over the terminator.
+ const TerminatorInst *OldTI = BB->getTerminator();
+ bool TerminatorDone = false;
+ if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
+ if (BI->isConditional()) {
+ // If the condition was a known constant in the callee...
+ ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+ // Or is a known constant in the caller...
+ if (!Cond) {
+ Value *V = VMap.lookup(BI->getCondition());
+ Cond = dyn_cast_or_null<ConstantInt>(V);
+ }
+
+ // Constant fold to uncond branch!
+ if (Cond) {
+ BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue());
+ VMap[OldTI] = BranchInst::Create(Dest, NewBB);
+ ToClone.push_back(Dest);
+ TerminatorDone = true;
+ }
+ }
+ } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
+ // If switching on a value known constant in the caller.
+ ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
+ if (!Cond) { // Or known constant after constant prop in the callee...
+ Value *V = VMap.lookup(SI->getCondition());
+ Cond = dyn_cast_or_null<ConstantInt>(V);
+ }
+ if (Cond) { // Constant fold to uncond branch!
+ SwitchInst::ConstCaseHandle Case = *SI->findCaseValue(Cond);
+ BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor());
+ VMap[OldTI] = BranchInst::Create(Dest, NewBB);
+ ToClone.push_back(Dest);
+ TerminatorDone = true;
+ }
+ }
+
+ if (!TerminatorDone) {
+ Instruction *NewInst = OldTI->clone();
+ if (OldTI->hasName())
+ NewInst->setName(OldTI->getName()+NameSuffix);
+ NewBB->getInstList().push_back(NewInst);
+ VMap[OldTI] = NewInst; // Add instruction map to value.
+
+ if (CodeInfo)
+ if (auto CS = ImmutableCallSite(OldTI))
+ if (CS.hasOperandBundles())
+ CodeInfo->OperandBundleCallSites.push_back(NewInst);
+
+ // Recursively clone any reachable successor blocks.
+ const TerminatorInst *TI = BB->getTerminator();
+ for (const BasicBlock *Succ : TI->successors())
+ ToClone.push_back(Succ);
+ }
+
+ if (CodeInfo) {
+ CodeInfo->ContainsCalls |= hasCalls;
+ CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+ CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
+ BB != &BB->getParent()->front();
+ }
+}
+
+/// This works like CloneAndPruneFunctionInto, except that it does not clone the
+/// entire function. Instead it starts at an instruction provided by the caller
+/// and copies (and prunes) only the code reachable from that instruction.
+void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
+ const Instruction *StartingInst,
+ ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
+ SmallVectorImpl<ReturnInst *> &Returns,
+ const char *NameSuffix,
+ ClonedCodeInfo *CodeInfo) {
+ assert(NameSuffix && "NameSuffix cannot be null!");
+
+ ValueMapTypeRemapper *TypeMapper = nullptr;
+ ValueMaterializer *Materializer = nullptr;
+
+#ifndef NDEBUG
+ // If the cloning starts at the beginning of the function, verify that
+ // the function arguments are mapped.
+ if (!StartingInst)
+ for (const Argument &II : OldFunc->args())
+ assert(VMap.count(&II) && "No mapping from source argument specified!");
+#endif
+
+ PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
+ NameSuffix, CodeInfo);
+ const BasicBlock *StartingBB;
+ if (StartingInst)
+ StartingBB = StartingInst->getParent();
+ else {
+ StartingBB = &OldFunc->getEntryBlock();
+ StartingInst = &StartingBB->front();
+ }
+
+ // Clone the entry block, and anything recursively reachable from it.
+ std::vector<const BasicBlock*> CloneWorklist;
+ PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist);
+ while (!CloneWorklist.empty()) {
+ const BasicBlock *BB = CloneWorklist.back();
+ CloneWorklist.pop_back();
+ PFC.CloneBlock(BB, BB->begin(), CloneWorklist);
+ }
+
+ // Loop over all of the basic blocks in the old function. If the block was
+ // reachable, we have cloned it and the old block is now in the value map:
+ // insert it into the new function in the right order. If not, ignore it.
+ //
+ // Defer PHI resolution until rest of function is resolved.
+ SmallVector<const PHINode*, 16> PHIToResolve;
+ for (const BasicBlock &BI : *OldFunc) {
+ Value *V = VMap.lookup(&BI);
+ BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
+ if (!NewBB) continue; // Dead block.
+
+ // Add the new block to the new function.
+ NewFunc->getBasicBlockList().push_back(NewBB);
+
+ // Handle PHI nodes specially, as we have to remove references to dead
+ // blocks.
+ for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) {
+ // PHI nodes may have been remapped to non-PHI nodes by the caller or
+ // during the cloning process.
+ if (const PHINode *PN = dyn_cast<PHINode>(I)) {
+ if (isa<PHINode>(VMap[PN]))
+ PHIToResolve.push_back(PN);
+ else
+ break;
+ } else {
+ break;
+ }
+ }
+
+ // Finally, remap the terminator instructions, as those can't be remapped
+ // until all BBs are mapped.
+ RemapInstruction(NewBB->getTerminator(), VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer);
+ }
+
+ // Defer PHI resolution until rest of function is resolved, PHI resolution
+ // requires the CFG to be up-to-date.
+ for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
+ const PHINode *OPN = PHIToResolve[phino];
+ unsigned NumPreds = OPN->getNumIncomingValues();
+ const BasicBlock *OldBB = OPN->getParent();
+ BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]);
+
+ // Map operands for blocks that are live and remove operands for blocks
+ // that are dead.
+ for (; phino != PHIToResolve.size() &&
+ PHIToResolve[phino]->getParent() == OldBB; ++phino) {
+ OPN = PHIToResolve[phino];
+ PHINode *PN = cast<PHINode>(VMap[OPN]);
+ for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
+ Value *V = VMap.lookup(PN->getIncomingBlock(pred));
+ if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
+ Value *InVal = MapValue(PN->getIncomingValue(pred),
+ VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+ assert(InVal && "Unknown input value?");
+ PN->setIncomingValue(pred, InVal);
+ PN->setIncomingBlock(pred, MappedBlock);
+ } else {
+ PN->removeIncomingValue(pred, false);
+ --pred; // Revisit the next entry.
+ --e;
+ }
+ }
+ }
+
+ // The loop above has removed PHI entries for those blocks that are dead
+ // and has updated others. However, if a block is live (i.e. copied over)
+ // but its terminator has been changed to not go to this block, then our
+ // phi nodes will have invalid entries. Update the PHI nodes in this
+ // case.
+ PHINode *PN = cast<PHINode>(NewBB->begin());
+ NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB));
+ if (NumPreds != PN->getNumIncomingValues()) {
+ assert(NumPreds < PN->getNumIncomingValues());
+ // Count how many times each predecessor comes to this block.
+ std::map<BasicBlock*, unsigned> PredCount;
+ for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
+ PI != E; ++PI)
+ --PredCount[*PI];
+
+ // Figure out how many entries to remove from each PHI.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ ++PredCount[PN->getIncomingBlock(i)];
+
+ // At this point, the excess predecessor entries are positive in the
+ // map. Loop over all of the PHIs and remove excess predecessor
+ // entries.
+ BasicBlock::iterator I = NewBB->begin();
+ for (; (PN = dyn_cast<PHINode>(I)); ++I) {
+ for (const auto &PCI : PredCount) {
+ BasicBlock *Pred = PCI.first;
+ for (unsigned NumToRemove = PCI.second; NumToRemove; --NumToRemove)
+ PN->removeIncomingValue(Pred, false);
+ }
+ }
+ }
+
+ // If the loops above have made these phi nodes have 0 or 1 operand,
+ // replace them with undef or the input value. We must do this for
+ // correctness, because 0-operand phis are not valid.
+ PN = cast<PHINode>(NewBB->begin());
+ if (PN->getNumIncomingValues() == 0) {
+ BasicBlock::iterator I = NewBB->begin();
+ BasicBlock::const_iterator OldI = OldBB->begin();
+ while ((PN = dyn_cast<PHINode>(I++))) {
+ Value *NV = UndefValue::get(PN->getType());
+ PN->replaceAllUsesWith(NV);
+ assert(VMap[&*OldI] == PN && "VMap mismatch");
+ VMap[&*OldI] = NV;
+ PN->eraseFromParent();
+ ++OldI;
+ }
+ }
+ }
+
+ // Make a second pass over the PHINodes now that all of them have been
+ // remapped into the new function, simplifying the PHINode and performing any
+ // recursive simplifications exposed. This will transparently update the
+ // WeakTrackingVH in the VMap. Notably, we rely on that so that if we coalesce
+ // two PHINodes, the iteration over the old PHIs remains valid, and the
+ // mapping will just map us to the new node (which may not even be a PHI
+ // node).
+ const DataLayout &DL = NewFunc->getParent()->getDataLayout();
+ SmallSetVector<const Value *, 8> Worklist;
+ for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
+ if (isa<PHINode>(VMap[PHIToResolve[Idx]]))
+ Worklist.insert(PHIToResolve[Idx]);
+
+ // Note that we must test the size on each iteration, the worklist can grow.
+ for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
+ const Value *OrigV = Worklist[Idx];
+ auto *I = dyn_cast_or_null<Instruction>(VMap.lookup(OrigV));
+ if (!I)
+ continue;
+
+ // Skip over non-intrinsic callsites, we don't want to remove any nodes from
+ // the CGSCC.
+ CallSite CS = CallSite(I);
+ if (CS && CS.getCalledFunction() && !CS.getCalledFunction()->isIntrinsic())
+ continue;
+
+ // See if this instruction simplifies.
+ Value *SimpleV = SimplifyInstruction(I, DL);
+ if (!SimpleV)
+ continue;
+
+ // Stash away all the uses of the old instruction so we can check them for
+ // recursive simplifications after a RAUW. This is cheaper than checking all
+ // uses of To on the recursive step in most cases.
+ for (const User *U : OrigV->users())
+ Worklist.insert(cast<Instruction>(U));
+
+ // Replace the instruction with its simplified value.
+ I->replaceAllUsesWith(SimpleV);
+
+ // If the original instruction had no side effects, remove it.
+ if (isInstructionTriviallyDead(I))
+ I->eraseFromParent();
+ else
+ VMap[OrigV] = I;
+ }
+
+ // Now that the inlined function body has been fully constructed, go through
+ // and zap unconditional fall-through branches. This happens all the time when
+ // specializing code: code specialization turns conditional branches into
+ // uncond branches, and this code folds them.
+ Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
+ Function::iterator I = Begin;
+ while (I != NewFunc->end()) {
+ // Check if this block has become dead during inlining or other
+ // simplifications. Note that the first block will appear dead, as it has
+ // not yet been wired up properly.
+ if (I != Begin && (pred_begin(&*I) == pred_end(&*I) ||
+ I->getSinglePredecessor() == &*I)) {
+ BasicBlock *DeadBB = &*I++;
+ DeleteDeadBlock(DeadBB);
+ continue;
+ }
+
+ // We need to simplify conditional branches and switches with a constant
+ // operand. We try to prune these out when cloning, but if the
+ // simplification required looking through PHI nodes, those are only
+ // available after forming the full basic block. That may leave some here,
+ // and we still want to prune the dead code as early as possible.
+ ConstantFoldTerminator(&*I);
+
+ BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
+ if (!BI || BI->isConditional()) { ++I; continue; }
+
+ BasicBlock *Dest = BI->getSuccessor(0);
+ if (!Dest->getSinglePredecessor()) {
+ ++I; continue;
+ }
+
+ // We shouldn't be able to get single-entry PHI nodes here, as instsimplify
+ // above should have zapped all of them..
+ assert(!isa<PHINode>(Dest->begin()));
+
+ // We know all single-entry PHI nodes in the inlined function have been
+ // removed, so we just need to splice the blocks.
+ BI->eraseFromParent();
+
+ // Make all PHI nodes that referred to Dest now refer to I as their source.
+ Dest->replaceAllUsesWith(&*I);
+
+ // Move all the instructions in the succ to the pred.
+ I->getInstList().splice(I->end(), Dest->getInstList());
+
+ // Remove the dest block.
+ Dest->eraseFromParent();
+
+ // Do not increment I, iteratively merge all things this block branches to.
+ }
+
+ // Make a final pass over the basic blocks from the old function to gather
+ // any return instructions which survived folding. We have to do this here
+ // because we can iteratively remove and merge returns above.
+ for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(),
+ E = NewFunc->end();
+ I != E; ++I)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator()))
+ Returns.push_back(RI);
+}
+
+
+/// This works exactly like CloneFunctionInto,
+/// except that it does some simple constant prop and DCE on the fly. The
+/// effect of this is to copy significantly less code in cases where (for
+/// example) a function call with constant arguments is inlined, and those
+/// constant arguments cause a significant amount of code in the callee to be
+/// dead. Since this doesn't produce an exact copy of the input, it can't be
+/// used for things like CloneFunction or CloneModule.
+void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
+ ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
+ SmallVectorImpl<ReturnInst*> &Returns,
+ const char *NameSuffix,
+ ClonedCodeInfo *CodeInfo,
+ Instruction *TheCall) {
+ CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap,
+ ModuleLevelChanges, Returns, NameSuffix, CodeInfo);
+}
+
+/// \brief Remaps instructions in \p Blocks using the mapping in \p VMap.
+void llvm::remapInstructionsInBlocks(
+ const SmallVectorImpl<BasicBlock *> &Blocks, ValueToValueMapTy &VMap) {
+ // Rewrite the code to refer to itself.
+ for (auto *BB : Blocks)
+ for (auto &Inst : *BB)
+ RemapInstruction(&Inst, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+}
+
+/// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p
+/// Blocks.
+///
+/// Updates LoopInfo and DominatorTree assuming the loop is dominated by block
+/// \p LoopDomBB. Insert the new blocks before block specified in \p Before.
+Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
+ Loop *OrigLoop, ValueToValueMapTy &VMap,
+ const Twine &NameSuffix, LoopInfo *LI,
+ DominatorTree *DT,
+ SmallVectorImpl<BasicBlock *> &Blocks) {
+ assert(OrigLoop->getSubLoops().empty() &&
+ "Loop to be cloned cannot have inner loop");
+ Function *F = OrigLoop->getHeader()->getParent();
+ Loop *ParentLoop = OrigLoop->getParentLoop();
+
+ Loop *NewLoop = new Loop();
+ if (ParentLoop)
+ ParentLoop->addChildLoop(NewLoop);
+ else
+ LI->addTopLevelLoop(NewLoop);
+
+ BasicBlock *OrigPH = OrigLoop->getLoopPreheader();
+ assert(OrigPH && "No preheader");
+ BasicBlock *NewPH = CloneBasicBlock(OrigPH, VMap, NameSuffix, F);
+ // To rename the loop PHIs.
+ VMap[OrigPH] = NewPH;
+ Blocks.push_back(NewPH);
+
+ // Update LoopInfo.
+ if (ParentLoop)
+ ParentLoop->addBasicBlockToLoop(NewPH, *LI);
+
+ // Update DominatorTree.
+ DT->addNewBlock(NewPH, LoopDomBB);
+
+ for (BasicBlock *BB : OrigLoop->getBlocks()) {
+ BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F);
+ VMap[BB] = NewBB;
+
+ // Update LoopInfo.
+ NewLoop->addBasicBlockToLoop(NewBB, *LI);
+
+ // Add DominatorTree node. After seeing all blocks, update to correct IDom.
+ DT->addNewBlock(NewBB, NewPH);
+
+ Blocks.push_back(NewBB);
+ }
+
+ for (BasicBlock *BB : OrigLoop->getBlocks()) {
+ // Update DominatorTree.
+ BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock();
+ DT->changeImmediateDominator(cast<BasicBlock>(VMap[BB]),
+ cast<BasicBlock>(VMap[IDomBB]));
+ }
+
+ // Move them physically from the end of the block list.
+ F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(),
+ NewPH);
+ F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(),
+ NewLoop->getHeader()->getIterator(), F->end());
+
+ return NewLoop;
+}
+
+/// \brief Duplicate non-Phi instructions from the beginning of block up to
+/// StopAt instruction into a split block between BB and its predecessor.
+BasicBlock *
+llvm::DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB,
+ Instruction *StopAt,
+ ValueToValueMapTy &ValueMapping) {
+ // We are going to have to map operands from the original BB block to the new
+ // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
+ // account for entry from PredBB.
+ BasicBlock::iterator BI = BB->begin();
+ for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+ ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+
+ BasicBlock *NewBB = SplitEdge(PredBB, BB);
+ NewBB->setName(PredBB->getName() + ".split");
+ Instruction *NewTerm = NewBB->getTerminator();
+
+ // Clone the non-phi instructions of BB into NewBB, keeping track of the
+ // mapping and using it to remap operands in the cloned instructions.
+ for (; StopAt != &*BI; ++BI) {
+ Instruction *New = BI->clone();
+ New->setName(BI->getName());
+ New->insertBefore(NewTerm);
+ ValueMapping[&*BI] = New;
+
+ // Remap operands to patch up intra-block references.
+ for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
+ if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
+ auto I = ValueMapping.find(Inst);
+ if (I != ValueMapping.end())
+ New->setOperand(i, I->second);
+ }
+ }
+
+ return NewBB;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
new file mode 100644
index 000000000000..e5392b53050d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -0,0 +1,201 @@
+//===- CloneModule.cpp - Clone an entire module ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneModule interface which makes a copy of an
+// entire module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Core.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) {
+ const Comdat *SC = Src->getComdat();
+ if (!SC)
+ return;
+ Comdat *DC = Dst->getParent()->getOrInsertComdat(SC->getName());
+ DC->setSelectionKind(SC->getSelectionKind());
+ Dst->setComdat(DC);
+}
+
+/// This is not as easy as it might seem because we have to worry about making
+/// copies of global variables and functions, and making their (initializers and
+/// references, respectively) refer to the right globals.
+///
+std::unique_ptr<Module> llvm::CloneModule(const Module *M) {
+ // Create the value map that maps things from the old module over to the new
+ // module.
+ ValueToValueMapTy VMap;
+ return CloneModule(M, VMap);
+}
+
+std::unique_ptr<Module> llvm::CloneModule(const Module *M,
+ ValueToValueMapTy &VMap) {
+ return CloneModule(M, VMap, [](const GlobalValue *GV) { return true; });
+}
+
+std::unique_ptr<Module> llvm::CloneModule(
+ const Module *M, ValueToValueMapTy &VMap,
+ function_ref<bool(const GlobalValue *)> ShouldCloneDefinition) {
+ // First off, we need to create the new module.
+ std::unique_ptr<Module> New =
+ llvm::make_unique<Module>(M->getModuleIdentifier(), M->getContext());
+ New->setDataLayout(M->getDataLayout());
+ New->setTargetTriple(M->getTargetTriple());
+ New->setModuleInlineAsm(M->getModuleInlineAsm());
+
+ // Loop over all of the global variables, making corresponding globals in the
+ // new module. Here we add them to the VMap and to the new Module. We
+ // don't worry about attributes or initializers, they will come later.
+ //
+ for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I) {
+ GlobalVariable *GV = new GlobalVariable(*New,
+ I->getValueType(),
+ I->isConstant(), I->getLinkage(),
+ (Constant*) nullptr, I->getName(),
+ (GlobalVariable*) nullptr,
+ I->getThreadLocalMode(),
+ I->getType()->getAddressSpace());
+ GV->copyAttributesFrom(&*I);
+ VMap[&*I] = GV;
+ }
+
+ // Loop over the functions in the module, making external functions as before
+ for (const Function &I : *M) {
+ Function *NF = Function::Create(cast<FunctionType>(I.getValueType()),
+ I.getLinkage(), I.getName(), New.get());
+ NF->copyAttributesFrom(&I);
+ VMap[&I] = NF;
+ }
+
+ // Loop over the aliases in the module
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I) {
+ if (!ShouldCloneDefinition(&*I)) {
+ // An alias cannot act as an external reference, so we need to create
+ // either a function or a global variable depending on the value type.
+ // FIXME: Once pointee types are gone we can probably pick one or the
+ // other.
+ GlobalValue *GV;
+ if (I->getValueType()->isFunctionTy())
+ GV = Function::Create(cast<FunctionType>(I->getValueType()),
+ GlobalValue::ExternalLinkage, I->getName(),
+ New.get());
+ else
+ GV = new GlobalVariable(
+ *New, I->getValueType(), false, GlobalValue::ExternalLinkage,
+ nullptr, I->getName(), nullptr,
+ I->getThreadLocalMode(), I->getType()->getAddressSpace());
+ VMap[&*I] = GV;
+ // We do not copy attributes (mainly because copying between different
+ // kinds of globals is forbidden), but this is generally not required for
+ // correctness.
+ continue;
+ }
+ auto *GA = GlobalAlias::create(I->getValueType(),
+ I->getType()->getPointerAddressSpace(),
+ I->getLinkage(), I->getName(), New.get());
+ GA->copyAttributesFrom(&*I);
+ VMap[&*I] = GA;
+ }
+
+ // Now that all of the things that global variable initializer can refer to
+ // have been created, loop through and copy the global variable referrers
+ // over... We also set the attributes on the global now.
+ //
+ for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I) {
+ if (I->isDeclaration())
+ continue;
+
+ GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]);
+ if (!ShouldCloneDefinition(&*I)) {
+ // Skip after setting the correct linkage for an external reference.
+ GV->setLinkage(GlobalValue::ExternalLinkage);
+ continue;
+ }
+ if (I->hasInitializer())
+ GV->setInitializer(MapValue(I->getInitializer(), VMap));
+
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ I->getAllMetadata(MDs);
+ for (auto MD : MDs)
+ GV->addMetadata(MD.first,
+ *MapMetadata(MD.second, VMap, RF_MoveDistinctMDs));
+
+ copyComdat(GV, &*I);
+ }
+
+ // Similarly, copy over function bodies now...
+ //
+ for (const Function &I : *M) {
+ if (I.isDeclaration())
+ continue;
+
+ Function *F = cast<Function>(VMap[&I]);
+ if (!ShouldCloneDefinition(&I)) {
+ // Skip after setting the correct linkage for an external reference.
+ F->setLinkage(GlobalValue::ExternalLinkage);
+ // Personality function is not valid on a declaration.
+ F->setPersonalityFn(nullptr);
+ continue;
+ }
+
+ Function::arg_iterator DestI = F->arg_begin();
+ for (Function::const_arg_iterator J = I.arg_begin(); J != I.arg_end();
+ ++J) {
+ DestI->setName(J->getName());
+ VMap[&*J] = &*DestI++;
+ }
+
+ SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned.
+ CloneFunctionInto(F, &I, VMap, /*ModuleLevelChanges=*/true, Returns);
+
+ if (I.hasPersonalityFn())
+ F->setPersonalityFn(MapValue(I.getPersonalityFn(), VMap));
+
+ copyComdat(F, &I);
+ }
+
+ // And aliases
+ for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ I != E; ++I) {
+ // We already dealt with undefined aliases above.
+ if (!ShouldCloneDefinition(&*I))
+ continue;
+ GlobalAlias *GA = cast<GlobalAlias>(VMap[&*I]);
+ if (const Constant *C = I->getAliasee())
+ GA->setAliasee(MapValue(C, VMap));
+ }
+
+ // And named metadata....
+ for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+ E = M->named_metadata_end(); I != E; ++I) {
+ const NamedMDNode &NMD = *I;
+ NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
+ for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
+ NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap));
+ }
+
+ return New;
+}
+
+extern "C" {
+
+LLVMModuleRef LLVMCloneModule(LLVMModuleRef M) {
+ return wrap(CloneModule(unwrap(M)).release());
+}
+
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp
new file mode 100644
index 000000000000..d9294c499309
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp
@@ -0,0 +1,108 @@
+//===- CmpInstAnalysis.cpp - Utils to help fold compares ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file holds routines to help analyse compare instructions
+// and fold them into constants or other compare instructions
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+
+unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) {
+ ICmpInst::Predicate Pred = InvertPred ? ICI->getInversePredicate()
+ : ICI->getPredicate();
+ switch (Pred) {
+ // False -> 0
+ case ICmpInst::ICMP_UGT: return 1; // 001
+ case ICmpInst::ICMP_SGT: return 1; // 001
+ case ICmpInst::ICMP_EQ: return 2; // 010
+ case ICmpInst::ICMP_UGE: return 3; // 011
+ case ICmpInst::ICMP_SGE: return 3; // 011
+ case ICmpInst::ICMP_ULT: return 4; // 100
+ case ICmpInst::ICMP_SLT: return 4; // 100
+ case ICmpInst::ICMP_NE: return 5; // 101
+ case ICmpInst::ICMP_ULE: return 6; // 110
+ case ICmpInst::ICMP_SLE: return 6; // 110
+ // True -> 7
+ default:
+ llvm_unreachable("Invalid ICmp predicate!");
+ }
+}
+
+Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
+ CmpInst::Predicate &NewICmpPred) {
+ switch (Code) {
+ default: llvm_unreachable("Illegal ICmp code!");
+ case 0: // False.
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ case 1: NewICmpPred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+ case 2: NewICmpPred = ICmpInst::ICMP_EQ; break;
+ case 3: NewICmpPred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+ case 4: NewICmpPred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+ case 5: NewICmpPred = ICmpInst::ICMP_NE; break;
+ case 6: NewICmpPred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+ case 7: // True.
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
+ }
+ return nullptr;
+}
+
+bool llvm::PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
+ return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
+ (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
+ (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
+}
+
+bool llvm::decomposeBitTestICmp(const ICmpInst *I, CmpInst::Predicate &Pred,
+ Value *&X, Value *&Y, Value *&Z) {
+ ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (!C)
+ return false;
+
+ switch (I->getPredicate()) {
+ default:
+ return false;
+ case ICmpInst::ICMP_SLT:
+ // X < 0 is equivalent to (X & SignMask) != 0.
+ if (!C->isZero())
+ return false;
+ Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth()));
+ Pred = ICmpInst::ICMP_NE;
+ break;
+ case ICmpInst::ICMP_SGT:
+ // X > -1 is equivalent to (X & SignMask) == 0.
+ if (!C->isMinusOne())
+ return false;
+ Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth()));
+ Pred = ICmpInst::ICMP_EQ;
+ break;
+ case ICmpInst::ICMP_ULT:
+ // X <u 2^n is equivalent to (X & ~(2^n-1)) == 0.
+ if (!C->getValue().isPowerOf2())
+ return false;
+ Y = ConstantInt::get(I->getContext(), -C->getValue());
+ Pred = ICmpInst::ICMP_EQ;
+ break;
+ case ICmpInst::ICMP_UGT:
+ // X >u 2^n-1 is equivalent to (X & ~(2^n-1)) != 0.
+ if (!(C->getValue() + 1).isPowerOf2())
+ return false;
+ Y = ConstantInt::get(I->getContext(), ~C->getValue());
+ Pred = ICmpInst::ICMP_NE;
+ break;
+ }
+
+ X = I->getOperand(0);
+ Z = ConstantInt::getNullValue(C->getType());
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
new file mode 100644
index 000000000000..1189714dfab1
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -0,0 +1,1122 @@
+//===- CodeExtractor.cpp - Pull code region into a new function -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interface to tear out a code region, such as an
+// individual loop or a parallel section, into a new function, replacing it with
+// a call to the new function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <algorithm>
+#include <set>
+using namespace llvm;
+
+#define DEBUG_TYPE "code-extractor"
+
+// Provide a command-line option to aggregate function arguments into a struct
+// for functions produced by the code extractor. This is useful when converting
+// extracted functions to pthread-based code, as only one argument (void*) can
+// be passed in to pthread_create().
+static cl::opt<bool>
+AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
+ cl::desc("Aggregate arguments to code-extracted functions"));
+
+/// \brief Test whether a block is valid for extraction.
+bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) {
+ // Landing pads must be in the function where they were inserted for cleanup.
+ if (BB.isEHPad())
+ return false;
+ // taking the address of a basic block moved to another function is illegal
+ if (BB.hasAddressTaken())
+ return false;
+
+ // don't hoist code that uses another basicblock address, as it's likely to
+ // lead to unexpected behavior, like cross-function jumps
+ SmallPtrSet<User const *, 16> Visited;
+ SmallVector<User const *, 16> ToVisit;
+
+ for (Instruction const &Inst : BB)
+ ToVisit.push_back(&Inst);
+
+ while (!ToVisit.empty()) {
+ User const *Curr = ToVisit.pop_back_val();
+ if (!Visited.insert(Curr).second)
+ continue;
+ if (isa<BlockAddress const>(Curr))
+ return false; // even a reference to self is likely to be not compatible
+
+ if (isa<Instruction>(Curr) && cast<Instruction>(Curr)->getParent() != &BB)
+ continue;
+
+ for (auto const &U : Curr->operands()) {
+ if (auto *UU = dyn_cast<User>(U))
+ ToVisit.push_back(UU);
+ }
+ }
+
+ // Don't hoist code containing allocas, invokes, or vastarts.
+ for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
+ if (isa<AllocaInst>(I) || isa<InvokeInst>(I))
+ return false;
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (const Function *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::vastart)
+ return false;
+ }
+
+ return true;
+}
+
+/// \brief Build a set of blocks to extract if the input blocks are viable.
+static SetVector<BasicBlock *>
+buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT) {
+ assert(!BBs.empty() && "The set of blocks to extract must be non-empty");
+ SetVector<BasicBlock *> Result;
+
+ // Loop over the blocks, adding them to our set-vector, and aborting with an
+ // empty set if we encounter invalid blocks.
+ for (BasicBlock *BB : BBs) {
+
+ // If this block is dead, don't process it.
+ if (DT && !DT->isReachableFromEntry(BB))
+ continue;
+
+ if (!Result.insert(BB))
+ llvm_unreachable("Repeated basic blocks in extraction input");
+ if (!CodeExtractor::isBlockValidForExtraction(*BB)) {
+ Result.clear();
+ return Result;
+ }
+ }
+
+#ifndef NDEBUG
+ for (SetVector<BasicBlock *>::iterator I = std::next(Result.begin()),
+ E = Result.end();
+ I != E; ++I)
+ for (pred_iterator PI = pred_begin(*I), PE = pred_end(*I);
+ PI != PE; ++PI)
+ assert(Result.count(*PI) &&
+ "No blocks in this region may have entries from outside the region"
+ " except for the first block!");
+#endif
+
+ return Result;
+}
+
+CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
+ bool AggregateArgs, BlockFrequencyInfo *BFI,
+ BranchProbabilityInfo *BPI)
+ : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
+ BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT)), NumExitBlocks(~0U) {}
+
+CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
+ BlockFrequencyInfo *BFI,
+ BranchProbabilityInfo *BPI)
+ : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
+ BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT)),
+ NumExitBlocks(~0U) {}
+
+/// definedInRegion - Return true if the specified value is defined in the
+/// extracted region.
+static bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (Blocks.count(I->getParent()))
+ return true;
+ return false;
+}
+
+/// definedInCaller - Return true if the specified value is defined in the
+/// function being code extracted, but not in the region being extracted.
+/// These values must be passed in as live-ins to the function.
+static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) {
+ if (isa<Argument>(V)) return true;
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (!Blocks.count(I->getParent()))
+ return true;
+ return false;
+}
+
+static BasicBlock *getCommonExitBlock(const SetVector<BasicBlock *> &Blocks) {
+ BasicBlock *CommonExitBlock = nullptr;
+ auto hasNonCommonExitSucc = [&](BasicBlock *Block) {
+ for (auto *Succ : successors(Block)) {
+ // Internal edges, ok.
+ if (Blocks.count(Succ))
+ continue;
+ if (!CommonExitBlock) {
+ CommonExitBlock = Succ;
+ continue;
+ }
+ if (CommonExitBlock == Succ)
+ continue;
+
+ return true;
+ }
+ return false;
+ };
+
+ if (any_of(Blocks, hasNonCommonExitSucc))
+ return nullptr;
+
+ return CommonExitBlock;
+}
+
+bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
+ Instruction *Addr) const {
+ AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets());
+ Function *Func = (*Blocks.begin())->getParent();
+ for (BasicBlock &BB : *Func) {
+ if (Blocks.count(&BB))
+ continue;
+ for (Instruction &II : BB) {
+
+ if (isa<DbgInfoIntrinsic>(II))
+ continue;
+
+ unsigned Opcode = II.getOpcode();
+ Value *MemAddr = nullptr;
+ switch (Opcode) {
+ case Instruction::Store:
+ case Instruction::Load: {
+ if (Opcode == Instruction::Store) {
+ StoreInst *SI = cast<StoreInst>(&II);
+ MemAddr = SI->getPointerOperand();
+ } else {
+ LoadInst *LI = cast<LoadInst>(&II);
+ MemAddr = LI->getPointerOperand();
+ }
+ // Global variable can not be aliased with locals.
+ if (dyn_cast<Constant>(MemAddr))
+ break;
+ Value *Base = MemAddr->stripInBoundsConstantOffsets();
+ if (!dyn_cast<AllocaInst>(Base) || Base == AI)
+ return false;
+ break;
+ }
+ default: {
+ IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II);
+ if (IntrInst) {
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
+ IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
+ break;
+ return false;
+ }
+ // Treat all the other cases conservatively if it has side effects.
+ if (II.mayHaveSideEffects())
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+BasicBlock *
+CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) {
+ BasicBlock *SinglePredFromOutlineRegion = nullptr;
+ assert(!Blocks.count(CommonExitBlock) &&
+ "Expect a block outside the region!");
+ for (auto *Pred : predecessors(CommonExitBlock)) {
+ if (!Blocks.count(Pred))
+ continue;
+ if (!SinglePredFromOutlineRegion) {
+ SinglePredFromOutlineRegion = Pred;
+ } else if (SinglePredFromOutlineRegion != Pred) {
+ SinglePredFromOutlineRegion = nullptr;
+ break;
+ }
+ }
+
+ if (SinglePredFromOutlineRegion)
+ return SinglePredFromOutlineRegion;
+
+#ifndef NDEBUG
+ auto getFirstPHI = [](BasicBlock *BB) {
+ BasicBlock::iterator I = BB->begin();
+ PHINode *FirstPhi = nullptr;
+ while (I != BB->end()) {
+ PHINode *Phi = dyn_cast<PHINode>(I);
+ if (!Phi)
+ break;
+ if (!FirstPhi) {
+ FirstPhi = Phi;
+ break;
+ }
+ }
+ return FirstPhi;
+ };
+ // If there are any phi nodes, the single pred either exists or has already
+ // be created before code extraction.
+ assert(!getFirstPHI(CommonExitBlock) && "Phi not expected");
+#endif
+
+ BasicBlock *NewExitBlock = CommonExitBlock->splitBasicBlock(
+ CommonExitBlock->getFirstNonPHI()->getIterator());
+
+ for (auto *Pred : predecessors(CommonExitBlock)) {
+ if (Blocks.count(Pred))
+ continue;
+ Pred->getTerminator()->replaceUsesOfWith(CommonExitBlock, NewExitBlock);
+ }
+ // Now add the old exit block to the outline region.
+ Blocks.insert(CommonExitBlock);
+ return CommonExitBlock;
+}
+
+void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
+ BasicBlock *&ExitBlock) const {
+ Function *Func = (*Blocks.begin())->getParent();
+ ExitBlock = getCommonExitBlock(Blocks);
+
+ for (BasicBlock &BB : *Func) {
+ if (Blocks.count(&BB))
+ continue;
+ for (Instruction &II : BB) {
+ auto *AI = dyn_cast<AllocaInst>(&II);
+ if (!AI)
+ continue;
+
+ // Find the pair of life time markers for address 'Addr' that are either
+ // defined inside the outline region or can legally be shrinkwrapped into
+ // the outline region. If there are not other untracked uses of the
+ // address, return the pair of markers if found; otherwise return a pair
+ // of nullptr.
+ auto GetLifeTimeMarkers =
+ [&](Instruction *Addr, bool &SinkLifeStart,
+ bool &HoistLifeEnd) -> std::pair<Instruction *, Instruction *> {
+ Instruction *LifeStart = nullptr, *LifeEnd = nullptr;
+
+ for (User *U : Addr->users()) {
+ IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
+ if (IntrInst) {
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) {
+ // Do not handle the case where AI has multiple start markers.
+ if (LifeStart)
+ return std::make_pair<Instruction *>(nullptr, nullptr);
+ LifeStart = IntrInst;
+ }
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) {
+ if (LifeEnd)
+ return std::make_pair<Instruction *>(nullptr, nullptr);
+ LifeEnd = IntrInst;
+ }
+ continue;
+ }
+ // Find untracked uses of the address, bail.
+ if (!definedInRegion(Blocks, U))
+ return std::make_pair<Instruction *>(nullptr, nullptr);
+ }
+
+ if (!LifeStart || !LifeEnd)
+ return std::make_pair<Instruction *>(nullptr, nullptr);
+
+ SinkLifeStart = !definedInRegion(Blocks, LifeStart);
+ HoistLifeEnd = !definedInRegion(Blocks, LifeEnd);
+ // Do legality Check.
+ if ((SinkLifeStart || HoistLifeEnd) &&
+ !isLegalToShrinkwrapLifetimeMarkers(Addr))
+ return std::make_pair<Instruction *>(nullptr, nullptr);
+
+ // Check to see if we have a place to do hoisting, if not, bail.
+ if (HoistLifeEnd && !ExitBlock)
+ return std::make_pair<Instruction *>(nullptr, nullptr);
+
+ return std::make_pair(LifeStart, LifeEnd);
+ };
+
+ bool SinkLifeStart = false, HoistLifeEnd = false;
+ auto Markers = GetLifeTimeMarkers(AI, SinkLifeStart, HoistLifeEnd);
+
+ if (Markers.first) {
+ if (SinkLifeStart)
+ SinkCands.insert(Markers.first);
+ SinkCands.insert(AI);
+ if (HoistLifeEnd)
+ HoistCands.insert(Markers.second);
+ continue;
+ }
+
+ // Follow the bitcast.
+ Instruction *MarkerAddr = nullptr;
+ for (User *U : AI->users()) {
+
+ if (U->stripInBoundsConstantOffsets() == AI) {
+ SinkLifeStart = false;
+ HoistLifeEnd = false;
+ Instruction *Bitcast = cast<Instruction>(U);
+ Markers = GetLifeTimeMarkers(Bitcast, SinkLifeStart, HoistLifeEnd);
+ if (Markers.first) {
+ MarkerAddr = Bitcast;
+ continue;
+ }
+ }
+
+ // Found unknown use of AI.
+ if (!definedInRegion(Blocks, U)) {
+ MarkerAddr = nullptr;
+ break;
+ }
+ }
+
+ if (MarkerAddr) {
+ if (SinkLifeStart)
+ SinkCands.insert(Markers.first);
+ if (!definedInRegion(Blocks, MarkerAddr))
+ SinkCands.insert(MarkerAddr);
+ SinkCands.insert(AI);
+ if (HoistLifeEnd)
+ HoistCands.insert(Markers.second);
+ }
+ }
+ }
+}
+
+void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
+ const ValueSet &SinkCands) const {
+
+ for (BasicBlock *BB : Blocks) {
+ // If a used value is defined outside the region, it's an input. If an
+ // instruction is used outside the region, it's an output.
+ for (Instruction &II : *BB) {
+ for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE;
+ ++OI) {
+ Value *V = *OI;
+ if (!SinkCands.count(V) && definedInCaller(Blocks, V))
+ Inputs.insert(V);
+ }
+
+ for (User *U : II.users())
+ if (!definedInRegion(Blocks, U)) {
+ Outputs.insert(&II);
+ break;
+ }
+ }
+ }
+}
+
+/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the
+/// region, we need to split the entry block of the region so that the PHI node
+/// is easier to deal with.
+void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
+ unsigned NumPredsFromRegion = 0;
+ unsigned NumPredsOutsideRegion = 0;
+
+ if (Header != &Header->getParent()->getEntryBlock()) {
+ PHINode *PN = dyn_cast<PHINode>(Header->begin());
+ if (!PN) return; // No PHI nodes.
+
+ // If the header node contains any PHI nodes, check to see if there is more
+ // than one entry from outside the region. If so, we need to sever the
+ // header block into two.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (Blocks.count(PN->getIncomingBlock(i)))
+ ++NumPredsFromRegion;
+ else
+ ++NumPredsOutsideRegion;
+
+ // If there is one (or fewer) predecessor from outside the region, we don't
+ // need to do anything special.
+ if (NumPredsOutsideRegion <= 1) return;
+ }
+
+ // Otherwise, we need to split the header block into two pieces: one
+ // containing PHI nodes merging values from outside of the region, and a
+ // second that contains all of the code for the block and merges back any
+ // incoming values from inside of the region.
+ BasicBlock *NewBB = llvm::SplitBlock(Header, Header->getFirstNonPHI(), DT);
+
+ // We only want to code extract the second block now, and it becomes the new
+ // header of the region.
+ BasicBlock *OldPred = Header;
+ Blocks.remove(OldPred);
+ Blocks.insert(NewBB);
+ Header = NewBB;
+
+ // Okay, now we need to adjust the PHI nodes and any branches from within the
+ // region to go to the new header block instead of the old header block.
+ if (NumPredsFromRegion) {
+ PHINode *PN = cast<PHINode>(OldPred->begin());
+ // Loop over all of the predecessors of OldPred that are in the region,
+ // changing them to branch to NewBB instead.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (Blocks.count(PN->getIncomingBlock(i))) {
+ TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator();
+ TI->replaceUsesOfWith(OldPred, NewBB);
+ }
+
+ // Okay, everything within the region is now branching to the right block, we
+ // just have to update the PHI nodes now, inserting PHI nodes into NewBB.
+ BasicBlock::iterator AfterPHIs;
+ for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {
+ PHINode *PN = cast<PHINode>(AfterPHIs);
+ // Create a new PHI node in the new region, which has an incoming value
+ // from OldPred of PN.
+ PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,
+ PN->getName() + ".ce", &NewBB->front());
+ PN->replaceAllUsesWith(NewPN);
+ NewPN->addIncoming(PN, OldPred);
+
+ // Loop over all of the incoming value in PN, moving them to NewPN if they
+ // are from the extracted region.
+ for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
+ if (Blocks.count(PN->getIncomingBlock(i))) {
+ NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i));
+ PN->removeIncomingValue(i);
+ --i;
+ }
+ }
+ }
+ }
+}
+
+void CodeExtractor::splitReturnBlocks() {
+ for (BasicBlock *Block : Blocks)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(Block->getTerminator())) {
+ BasicBlock *New =
+ Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret");
+ if (DT) {
+ // Old dominates New. New node dominates all other nodes dominated
+ // by Old.
+ DomTreeNode *OldNode = DT->getNode(Block);
+ SmallVector<DomTreeNode *, 8> Children(OldNode->begin(),
+ OldNode->end());
+
+ DomTreeNode *NewNode = DT->addNewBlock(New, Block);
+
+ for (DomTreeNode *I : Children)
+ DT->changeImmediateDominator(I, NewNode);
+ }
+ }
+}
+
+/// constructFunction - make a function based on inputs and outputs, as follows:
+/// f(in0, ..., inN, out0, ..., outN)
+///
+Function *CodeExtractor::constructFunction(const ValueSet &inputs,
+ const ValueSet &outputs,
+ BasicBlock *header,
+ BasicBlock *newRootNode,
+ BasicBlock *newHeader,
+ Function *oldFunction,
+ Module *M) {
+ DEBUG(dbgs() << "inputs: " << inputs.size() << "\n");
+ DEBUG(dbgs() << "outputs: " << outputs.size() << "\n");
+
+ // This function returns unsigned, outputs will go back by reference.
+ switch (NumExitBlocks) {
+ case 0:
+ case 1: RetTy = Type::getVoidTy(header->getContext()); break;
+ case 2: RetTy = Type::getInt1Ty(header->getContext()); break;
+ default: RetTy = Type::getInt16Ty(header->getContext()); break;
+ }
+
+ std::vector<Type*> paramTy;
+
+ // Add the types of the input values to the function's argument list
+ for (Value *value : inputs) {
+ DEBUG(dbgs() << "value used in func: " << *value << "\n");
+ paramTy.push_back(value->getType());
+ }
+
+ // Add the types of the output values to the function's argument list.
+ for (Value *output : outputs) {
+ DEBUG(dbgs() << "instr used in func: " << *output << "\n");
+ if (AggregateArgs)
+ paramTy.push_back(output->getType());
+ else
+ paramTy.push_back(PointerType::getUnqual(output->getType()));
+ }
+
+ DEBUG({
+ dbgs() << "Function type: " << *RetTy << " f(";
+ for (Type *i : paramTy)
+ dbgs() << *i << ", ";
+ dbgs() << ")\n";
+ });
+
+ StructType *StructTy;
+ if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+ StructTy = StructType::get(M->getContext(), paramTy);
+ paramTy.clear();
+ paramTy.push_back(PointerType::getUnqual(StructTy));
+ }
+ FunctionType *funcType =
+ FunctionType::get(RetTy, paramTy, false);
+
+ // Create the new function
+ Function *newFunction = Function::Create(funcType,
+ GlobalValue::InternalLinkage,
+ oldFunction->getName() + "_" +
+ header->getName(), M);
+ // If the old function is no-throw, so is the new one.
+ if (oldFunction->doesNotThrow())
+ newFunction->setDoesNotThrow();
+
+ // Inherit the uwtable attribute if we need to.
+ if (oldFunction->hasUWTable())
+ newFunction->setHasUWTable();
+
+ // Inherit all of the target dependent attributes.
+ // (e.g. If the extracted region contains a call to an x86.sse
+ // instruction we need to make sure that the extracted region has the
+ // "target-features" attribute allowing it to be lowered.
+ // FIXME: This should be changed to check to see if a specific
+ // attribute can not be inherited.
+ AttrBuilder AB(oldFunction->getAttributes().getFnAttributes());
+ for (const auto &Attr : AB.td_attrs())
+ newFunction->addFnAttr(Attr.first, Attr.second);
+
+ newFunction->getBasicBlockList().push_back(newRootNode);
+
+ // Create an iterator to name all of the arguments we inserted.
+ Function::arg_iterator AI = newFunction->arg_begin();
+
+ // Rewrite all users of the inputs in the extracted region to use the
+ // arguments (or appropriate addressing into struct) instead.
+ for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+ Value *RewriteVal;
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
+ TerminatorInst *TI = newFunction->begin()->getTerminator();
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI);
+ RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
+ } else
+ RewriteVal = &*AI++;
+
+ std::vector<User*> Users(inputs[i]->user_begin(), inputs[i]->user_end());
+ for (User *use : Users)
+ if (Instruction *inst = dyn_cast<Instruction>(use))
+ if (Blocks.count(inst->getParent()))
+ inst->replaceUsesOfWith(inputs[i], RewriteVal);
+ }
+
+ // Set names for input and output arguments.
+ if (!AggregateArgs) {
+ AI = newFunction->arg_begin();
+ for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI)
+ AI->setName(inputs[i]->getName());
+ for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI)
+ AI->setName(outputs[i]->getName()+".out");
+ }
+
+ // Rewrite branches to basic blocks outside of the loop to new dummy blocks
+ // within the new function. This must be done before we lose track of which
+ // blocks were originally in the code region.
+ std::vector<User*> Users(header->user_begin(), header->user_end());
+ for (unsigned i = 0, e = Users.size(); i != e; ++i)
+ // The BasicBlock which contains the branch is not in the region
+ // modify the branch target to a new block
+ if (TerminatorInst *TI = dyn_cast<TerminatorInst>(Users[i]))
+ if (!Blocks.count(TI->getParent()) &&
+ TI->getParent()->getParent() == oldFunction)
+ TI->replaceUsesOfWith(header, newHeader);
+
+ return newFunction;
+}
+
+/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI
+/// that uses the value within the basic block, and return the predecessor
+/// block associated with that use, or return 0 if none is found.
+static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) {
+ for (Use &U : Used->uses()) {
+ PHINode *P = dyn_cast<PHINode>(U.getUser());
+ if (P && P->getParent() == BB)
+ return P->getIncomingBlock(U);
+ }
+
+ return nullptr;
+}
+
+/// emitCallAndSwitchStatement - This method sets up the caller side by adding
+/// the call instruction, splitting any PHI nodes in the header block as
+/// necessary.
+void CodeExtractor::
+emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
+ ValueSet &inputs, ValueSet &outputs) {
+ // Emit a call to the new function, passing in: *pointer to struct (if
+ // aggregating parameters), or plan inputs and allocated memory for outputs
+ std::vector<Value*> params, StructValues, ReloadOutputs, Reloads;
+
+ Module *M = newFunction->getParent();
+ LLVMContext &Context = M->getContext();
+ const DataLayout &DL = M->getDataLayout();
+
+ // Add inputs as params, or to be filled into the struct
+ for (Value *input : inputs)
+ if (AggregateArgs)
+ StructValues.push_back(input);
+ else
+ params.push_back(input);
+
+ // Create allocas for the outputs
+ for (Value *output : outputs) {
+ if (AggregateArgs) {
+ StructValues.push_back(output);
+ } else {
+ AllocaInst *alloca =
+ new AllocaInst(output->getType(), DL.getAllocaAddrSpace(),
+ nullptr, output->getName() + ".loc",
+ &codeReplacer->getParent()->front().front());
+ ReloadOutputs.push_back(alloca);
+ params.push_back(alloca);
+ }
+ }
+
+ StructType *StructArgTy = nullptr;
+ AllocaInst *Struct = nullptr;
+ if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+ std::vector<Type*> ArgTypes;
+ for (ValueSet::iterator v = StructValues.begin(),
+ ve = StructValues.end(); v != ve; ++v)
+ ArgTypes.push_back((*v)->getType());
+
+ // Allocate a struct at the beginning of this function
+ StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
+ Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr,
+ "structArg",
+ &codeReplacer->getParent()->front().front());
+ params.push_back(Struct);
+
+ for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName());
+ codeReplacer->getInstList().push_back(GEP);
+ StoreInst *SI = new StoreInst(StructValues[i], GEP);
+ codeReplacer->getInstList().push_back(SI);
+ }
+ }
+
+ // Emit the call to the function
+ CallInst *call = CallInst::Create(newFunction, params,
+ NumExitBlocks > 1 ? "targetBlock" : "");
+ codeReplacer->getInstList().push_back(call);
+
+ Function::arg_iterator OutputArgBegin = newFunction->arg_begin();
+ unsigned FirstOut = inputs.size();
+ if (!AggregateArgs)
+ std::advance(OutputArgBegin, inputs.size());
+
+ // Reload the outputs passed in by reference
+ for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
+ Value *Output = nullptr;
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName());
+ codeReplacer->getInstList().push_back(GEP);
+ Output = GEP;
+ } else {
+ Output = ReloadOutputs[i];
+ }
+ LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
+ Reloads.push_back(load);
+ codeReplacer->getInstList().push_back(load);
+ std::vector<User*> Users(outputs[i]->user_begin(), outputs[i]->user_end());
+ for (unsigned u = 0, e = Users.size(); u != e; ++u) {
+ Instruction *inst = cast<Instruction>(Users[u]);
+ if (!Blocks.count(inst->getParent()))
+ inst->replaceUsesOfWith(outputs[i], load);
+ }
+ }
+
+ // Now we can emit a switch statement using the call as a value.
+ SwitchInst *TheSwitch =
+ SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)),
+ codeReplacer, 0, codeReplacer);
+
+ // Since there may be multiple exits from the original region, make the new
+ // function return an unsigned, switch on that number. This loop iterates
+ // over all of the blocks in the extracted region, updating any terminator
+ // instructions in the to-be-extracted region that branch to blocks that are
+ // not in the region to be extracted.
+ std::map<BasicBlock*, BasicBlock*> ExitBlockMap;
+
+ unsigned switchVal = 0;
+ for (BasicBlock *Block : Blocks) {
+ TerminatorInst *TI = Block->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (!Blocks.count(TI->getSuccessor(i))) {
+ BasicBlock *OldTarget = TI->getSuccessor(i);
+ // add a new basic block which returns the appropriate value
+ BasicBlock *&NewTarget = ExitBlockMap[OldTarget];
+ if (!NewTarget) {
+ // If we don't already have an exit stub for this non-extracted
+ // destination, create one now!
+ NewTarget = BasicBlock::Create(Context,
+ OldTarget->getName() + ".exitStub",
+ newFunction);
+ unsigned SuccNum = switchVal++;
+
+ Value *brVal = nullptr;
+ switch (NumExitBlocks) {
+ case 0:
+ case 1: break; // No value needed.
+ case 2: // Conditional branch, return a bool
+ brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);
+ break;
+ default:
+ brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);
+ break;
+ }
+
+ ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget);
+
+ // Update the switch instruction.
+ TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
+ SuccNum),
+ OldTarget);
+
+ // Restore values just before we exit
+ Function::arg_iterator OAI = OutputArgBegin;
+ for (unsigned out = 0, e = outputs.size(); out != e; ++out) {
+ // For an invoke, the normal destination is the only one that is
+ // dominated by the result of the invocation
+ BasicBlock *DefBlock = cast<Instruction>(outputs[out])->getParent();
+
+ bool DominatesDef = true;
+
+ BasicBlock *NormalDest = nullptr;
+ if (auto *Invoke = dyn_cast<InvokeInst>(outputs[out]))
+ NormalDest = Invoke->getNormalDest();
+
+ if (NormalDest) {
+ DefBlock = NormalDest;
+
+ // Make sure we are looking at the original successor block, not
+ // at a newly inserted exit block, which won't be in the dominator
+ // info.
+ for (const auto &I : ExitBlockMap)
+ if (DefBlock == I.second) {
+ DefBlock = I.first;
+ break;
+ }
+
+ // In the extract block case, if the block we are extracting ends
+ // with an invoke instruction, make sure that we don't emit a
+ // store of the invoke value for the unwind block.
+ if (!DT && DefBlock != OldTarget)
+ DominatesDef = false;
+ }
+
+ if (DT) {
+ DominatesDef = DT->dominates(DefBlock, OldTarget);
+
+ // If the output value is used by a phi in the target block,
+ // then we need to test for dominance of the phi's predecessor
+ // instead. Unfortunately, this a little complicated since we
+ // have already rewritten uses of the value to uses of the reload.
+ BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out],
+ OldTarget);
+ if (pred && DT && DT->dominates(DefBlock, pred))
+ DominatesDef = true;
+ }
+
+ if (DominatesDef) {
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
+ FirstOut+out);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ StructArgTy, &*OAI, Idx, "gep_" + outputs[out]->getName(),
+ NTRet);
+ new StoreInst(outputs[out], GEP, NTRet);
+ } else {
+ new StoreInst(outputs[out], &*OAI, NTRet);
+ }
+ }
+ // Advance output iterator even if we don't emit a store
+ if (!AggregateArgs) ++OAI;
+ }
+ }
+
+ // rewrite the original branch instruction with this new target
+ TI->setSuccessor(i, NewTarget);
+ }
+ }
+
+ // Now that we've done the deed, simplify the switch instruction.
+ Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType();
+ switch (NumExitBlocks) {
+ case 0:
+ // There are no successors (the block containing the switch itself), which
+ // means that previously this was the last part of the function, and hence
+ // this should be rewritten as a `ret'
+
+ // Check if the function should return a value
+ if (OldFnRetTy->isVoidTy()) {
+ ReturnInst::Create(Context, nullptr, TheSwitch); // Return void
+ } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
+ // return what we have
+ ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch);
+ } else {
+ // Otherwise we must have code extracted an unwind or something, just
+ // return whatever we want.
+ ReturnInst::Create(Context,
+ Constant::getNullValue(OldFnRetTy), TheSwitch);
+ }
+
+ TheSwitch->eraseFromParent();
+ break;
+ case 1:
+ // Only a single destination, change the switch into an unconditional
+ // branch.
+ BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch);
+ TheSwitch->eraseFromParent();
+ break;
+ case 2:
+ BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2),
+ call, TheSwitch);
+ TheSwitch->eraseFromParent();
+ break;
+ default:
+ // Otherwise, make the default destination of the switch instruction be one
+ // of the other successors.
+ TheSwitch->setCondition(call);
+ TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks));
+ // Remove redundant case
+ TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1));
+ break;
+ }
+}
+
+void CodeExtractor::moveCodeToFunction(Function *newFunction) {
+ Function *oldFunc = (*Blocks.begin())->getParent();
+ Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList();
+ Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList();
+
+ for (BasicBlock *Block : Blocks) {
+ // Delete the basic block from the old function, and the list of blocks
+ oldBlocks.remove(Block);
+
+ // Insert this basic block into the new function
+ newBlocks.push_back(Block);
+ }
+}
+
+void CodeExtractor::calculateNewCallTerminatorWeights(
+ BasicBlock *CodeReplacer,
+ DenseMap<BasicBlock *, BlockFrequency> &ExitWeights,
+ BranchProbabilityInfo *BPI) {
+ typedef BlockFrequencyInfoImplBase::Distribution Distribution;
+ typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
+
+ // Update the branch weights for the exit block.
+ TerminatorInst *TI = CodeReplacer->getTerminator();
+ SmallVector<unsigned, 8> BranchWeights(TI->getNumSuccessors(), 0);
+
+ // Block Frequency distribution with dummy node.
+ Distribution BranchDist;
+
+ // Add each of the frequencies of the successors.
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) {
+ BlockNode ExitNode(i);
+ uint64_t ExitFreq = ExitWeights[TI->getSuccessor(i)].getFrequency();
+ if (ExitFreq != 0)
+ BranchDist.addExit(ExitNode, ExitFreq);
+ else
+ BPI->setEdgeProbability(CodeReplacer, i, BranchProbability::getZero());
+ }
+
+ // Check for no total weight.
+ if (BranchDist.Total == 0)
+ return;
+
+ // Normalize the distribution so that they can fit in unsigned.
+ BranchDist.normalize();
+
+ // Create normalized branch weights and set the metadata.
+ for (unsigned I = 0, E = BranchDist.Weights.size(); I < E; ++I) {
+ const auto &Weight = BranchDist.Weights[I];
+
+ // Get the weight and update the current BFI.
+ BranchWeights[Weight.TargetNode.Index] = Weight.Amount;
+ BranchProbability BP(Weight.Amount, BranchDist.Total);
+ BPI->setEdgeProbability(CodeReplacer, Weight.TargetNode.Index, BP);
+ }
+ TI->setMetadata(
+ LLVMContext::MD_prof,
+ MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
+}
+
+Function *CodeExtractor::extractCodeRegion() {
+ if (!isEligible())
+ return nullptr;
+
+ ValueSet inputs, outputs, SinkingCands, HoistingCands;
+ BasicBlock *CommonExit = nullptr;
+
+ // Assumption: this is a single-entry code region, and the header is the first
+ // block in the region.
+ BasicBlock *header = *Blocks.begin();
+
+ // Calculate the entry frequency of the new function before we change the root
+ // block.
+ BlockFrequency EntryFreq;
+ if (BFI) {
+ assert(BPI && "Both BPI and BFI are required to preserve profile info");
+ for (BasicBlock *Pred : predecessors(header)) {
+ if (Blocks.count(Pred))
+ continue;
+ EntryFreq +=
+ BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header);
+ }
+ }
+
+ // If we have to split PHI nodes or the entry block, do so now.
+ severSplitPHINodes(header);
+
+ // If we have any return instructions in the region, split those blocks so
+ // that the return is not in the region.
+ splitReturnBlocks();
+
+ Function *oldFunction = header->getParent();
+
+ // This takes place of the original loop
+ BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
+ "codeRepl", oldFunction,
+ header);
+
+ // The new function needs a root node because other nodes can branch to the
+ // head of the region, but the entry node of a function cannot have preds.
+ BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),
+ "newFuncRoot");
+ newFuncRoot->getInstList().push_back(BranchInst::Create(header));
+
+ findAllocas(SinkingCands, HoistingCands, CommonExit);
+ assert(HoistingCands.empty() || CommonExit);
+
+ // Find inputs to, outputs from the code region.
+ findInputsOutputs(inputs, outputs, SinkingCands);
+
+ // Now sink all instructions which only have non-phi uses inside the region
+ for (auto *II : SinkingCands)
+ cast<Instruction>(II)->moveBefore(*newFuncRoot,
+ newFuncRoot->getFirstInsertionPt());
+
+ if (!HoistingCands.empty()) {
+ auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit);
+ Instruction *TI = HoistToBlock->getTerminator();
+ for (auto *II : HoistingCands)
+ cast<Instruction>(II)->moveBefore(TI);
+ }
+
+ // Calculate the exit blocks for the extracted region and the total exit
+ // weights for each of those blocks.
+ DenseMap<BasicBlock *, BlockFrequency> ExitWeights;
+ SmallPtrSet<BasicBlock *, 1> ExitBlocks;
+ for (BasicBlock *Block : Blocks) {
+ for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE;
+ ++SI) {
+ if (!Blocks.count(*SI)) {
+ // Update the branch weight for this successor.
+ if (BFI) {
+ BlockFrequency &BF = ExitWeights[*SI];
+ BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, *SI);
+ }
+ ExitBlocks.insert(*SI);
+ }
+ }
+ }
+ NumExitBlocks = ExitBlocks.size();
+
+ // Construct new function based on inputs/outputs & add allocas for all defs.
+ Function *newFunction = constructFunction(inputs, outputs, header,
+ newFuncRoot,
+ codeReplacer, oldFunction,
+ oldFunction->getParent());
+
+ // Update the entry count of the function.
+ if (BFI) {
+ Optional<uint64_t> EntryCount =
+ BFI->getProfileCountFromFreq(EntryFreq.getFrequency());
+ if (EntryCount.hasValue())
+ newFunction->setEntryCount(EntryCount.getValue());
+ BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency());
+ }
+
+ emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
+
+ moveCodeToFunction(newFunction);
+
+ // Update the branch weights for the exit block.
+ if (BFI && NumExitBlocks > 1)
+ calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI);
+
+ // Loop over all of the PHI nodes in the header block, and change any
+ // references to the old incoming edge to be the new incoming edge.
+ for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!Blocks.count(PN->getIncomingBlock(i)))
+ PN->setIncomingBlock(i, newFuncRoot);
+ }
+
+ // Look at all successors of the codeReplacer block. If any of these blocks
+ // had PHI nodes in them, we need to update the "from" block to be the code
+ // replacer, not the original block in the extracted region.
+ std::vector<BasicBlock*> Succs(succ_begin(codeReplacer),
+ succ_end(codeReplacer));
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+ for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ std::set<BasicBlock*> ProcessedPreds;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (Blocks.count(PN->getIncomingBlock(i))) {
+ if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second)
+ PN->setIncomingBlock(i, codeReplacer);
+ else {
+ // There were multiple entries in the PHI for this block, now there
+ // is only one, so remove the duplicated entries.
+ PN->removeIncomingValue(i, false);
+ --i; --e;
+ }
+ }
+ }
+
+ DEBUG(if (verifyFunction(*newFunction))
+ report_fatal_error("verifyFunction failed!"));
+ return newFunction;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
new file mode 100644
index 000000000000..6642a97a29c2
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
@@ -0,0 +1,165 @@
+//===- CtorUtils.cpp - Helpers for working with global_ctors ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functions that are used to process llvm.global_ctors.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CtorUtils.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "ctor_utils"
+
+namespace llvm {
+
+namespace {
+/// Given a specified llvm.global_ctors list, remove the listed elements.
+void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) {
+ // Filter out the initializer elements to remove.
+ ConstantArray *OldCA = cast<ConstantArray>(GCL->getInitializer());
+ SmallVector<Constant *, 10> CAList;
+ for (unsigned I = 0, E = OldCA->getNumOperands(); I < E; ++I)
+ if (!CtorsToRemove.test(I))
+ CAList.push_back(OldCA->getOperand(I));
+
+ // Create the new array initializer.
+ ArrayType *ATy =
+ ArrayType::get(OldCA->getType()->getElementType(), CAList.size());
+ Constant *CA = ConstantArray::get(ATy, CAList);
+
+ // If we didn't change the number of elements, don't create a new GV.
+ if (CA->getType() == OldCA->getType()) {
+ GCL->setInitializer(CA);
+ return;
+ }
+
+ // Create the new global and insert it next to the existing list.
+ GlobalVariable *NGV =
+ new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(),
+ CA, "", GCL->getThreadLocalMode());
+ GCL->getParent()->getGlobalList().insert(GCL->getIterator(), NGV);
+ NGV->takeName(GCL);
+
+ // Nuke the old list, replacing any uses with the new one.
+ if (!GCL->use_empty()) {
+ Constant *V = NGV;
+ if (V->getType() != GCL->getType())
+ V = ConstantExpr::getBitCast(V, GCL->getType());
+ GCL->replaceAllUsesWith(V);
+ }
+ GCL->eraseFromParent();
+}
+
+/// Given a llvm.global_ctors list that we can understand,
+/// return a list of the functions and null terminator as a vector.
+std::vector<Function *> parseGlobalCtors(GlobalVariable *GV) {
+ if (GV->getInitializer()->isNullValue())
+ return std::vector<Function *>();
+ ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+ std::vector<Function *> Result;
+ Result.reserve(CA->getNumOperands());
+ for (auto &V : CA->operands()) {
+ ConstantStruct *CS = cast<ConstantStruct>(V);
+ Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
+ }
+ return Result;
+}
+
+/// Find the llvm.global_ctors list, verifying that all initializers have an
+/// init priority of 65535.
+GlobalVariable *findGlobalCtors(Module &M) {
+ GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+ if (!GV)
+ return nullptr;
+
+ // Verify that the initializer is simple enough for us to handle. We are
+ // only allowed to optimize the initializer if it is unique.
+ if (!GV->hasUniqueInitializer())
+ return nullptr;
+
+ if (isa<ConstantAggregateZero>(GV->getInitializer()))
+ return GV;
+ ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+
+ for (auto &V : CA->operands()) {
+ if (isa<ConstantAggregateZero>(V))
+ continue;
+ ConstantStruct *CS = cast<ConstantStruct>(V);
+ if (isa<ConstantPointerNull>(CS->getOperand(1)))
+ continue;
+
+ // Must have a function or null ptr.
+ if (!isa<Function>(CS->getOperand(1)))
+ return nullptr;
+
+ // Init priority must be standard.
+ ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0));
+ if (CI->getZExtValue() != 65535)
+ return nullptr;
+ }
+
+ return GV;
+}
+} // namespace
+
+/// Call "ShouldRemove" for every entry in M's global_ctor list and remove the
+/// entries for which it returns true. Return true if anything changed.
+bool optimizeGlobalCtorsList(Module &M,
+ function_ref<bool(Function *)> ShouldRemove) {
+ GlobalVariable *GlobalCtors = findGlobalCtors(M);
+ if (!GlobalCtors)
+ return false;
+
+ std::vector<Function *> Ctors = parseGlobalCtors(GlobalCtors);
+ if (Ctors.empty())
+ return false;
+
+ bool MadeChange = false;
+
+ // Loop over global ctors, optimizing them when we can.
+ unsigned NumCtors = Ctors.size();
+ BitVector CtorsToRemove(NumCtors);
+ for (unsigned i = 0; i != Ctors.size() && NumCtors > 0; ++i) {
+ Function *F = Ctors[i];
+ // Found a null terminator in the middle of the list, prune off the rest of
+ // the list.
+ if (!F)
+ continue;
+
+ DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");
+
+ // We cannot simplify external ctor functions.
+ if (F->empty())
+ continue;
+
+ // If we can evaluate the ctor at compile time, do.
+ if (ShouldRemove(F)) {
+ Ctors[i] = nullptr;
+ CtorsToRemove.set(i);
+ NumCtors--;
+ MadeChange = true;
+ continue;
+ }
+ }
+
+ if (!MadeChange)
+ return false;
+
+ removeGlobalCtors(GlobalCtors, CtorsToRemove);
+ return true;
+}
+
+} // End llvm namespace
diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
new file mode 100644
index 000000000000..6d3d287defdb
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -0,0 +1,151 @@
+//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+/// DemoteRegToStack - This function takes a virtual register computed by an
+/// Instruction and replaces it with a slot in the stack frame, allocated via
+/// alloca. This allows the CFG to be changed around without fear of
+/// invalidating the SSA information for the value. It returns the pointer to
+/// the alloca inserted to create a stack slot for I.
+AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
+ Instruction *AllocaPoint) {
+ if (I.use_empty()) {
+ I.eraseFromParent();
+ return nullptr;
+ }
+
+ Function *F = I.getParent()->getParent();
+ const DataLayout &DL = F->getParent()->getDataLayout();
+
+ // Create a stack slot to hold the value.
+ AllocaInst *Slot;
+ if (AllocaPoint) {
+ Slot = new AllocaInst(I.getType(), DL.getAllocaAddrSpace(), nullptr,
+ I.getName()+".reg2mem", AllocaPoint);
+ } else {
+ Slot = new AllocaInst(I.getType(), DL.getAllocaAddrSpace(), nullptr,
+ I.getName() + ".reg2mem", &F->getEntryBlock().front());
+ }
+
+ // We cannot demote invoke instructions to the stack if their normal edge
+ // is critical. Therefore, split the critical edge and create a basic block
+ // into which the store can be inserted.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
+ if (!II->getNormalDest()->getSinglePredecessor()) {
+ unsigned SuccNum = GetSuccessorNumber(II->getParent(), II->getNormalDest());
+ assert(isCriticalEdge(II, SuccNum) && "Expected a critical edge!");
+ BasicBlock *BB = SplitCriticalEdge(II, SuccNum);
+ assert(BB && "Unable to split critical edge.");
+ (void)BB;
+ }
+ }
+
+ // Change all of the users of the instruction to read from the stack slot.
+ while (!I.use_empty()) {
+ Instruction *U = cast<Instruction>(I.user_back());
+ if (PHINode *PN = dyn_cast<PHINode>(U)) {
+ // If this is a PHI node, we can't insert a load of the value before the
+ // use. Instead insert the load in the predecessor block corresponding
+ // to the incoming value.
+ //
+ // Note that if there are multiple edges from a basic block to this PHI
+ // node that we cannot have multiple loads. The problem is that the
+ // resulting PHI node will have multiple values (from each load) coming in
+ // from the same block, which is illegal SSA form. For this reason, we
+ // keep track of and reuse loads we insert.
+ DenseMap<BasicBlock*, Value*> Loads;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == &I) {
+ Value *&V = Loads[PN->getIncomingBlock(i)];
+ if (!V) {
+ // Insert the load into the predecessor block
+ V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads,
+ PN->getIncomingBlock(i)->getTerminator());
+ }
+ PN->setIncomingValue(i, V);
+ }
+
+ } else {
+ // If this is a normal instruction, just insert a load.
+ Value *V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, U);
+ U->replaceUsesOfWith(&I, V);
+ }
+ }
+
+ // Insert stores of the computed value into the stack slot. We have to be
+ // careful if I is an invoke instruction, because we can't insert the store
+ // AFTER the terminator instruction.
+ BasicBlock::iterator InsertPt;
+ if (!isa<TerminatorInst>(I)) {
+ InsertPt = ++I.getIterator();
+ for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
+ /* empty */; // Don't insert before PHI nodes or landingpad instrs.
+ } else {
+ InvokeInst &II = cast<InvokeInst>(I);
+ InsertPt = II.getNormalDest()->getFirstInsertionPt();
+ }
+
+ new StoreInst(&I, Slot, &*InsertPt);
+ return Slot;
+}
+
+/// DemotePHIToStack - This function takes a virtual register computed by a PHI
+/// node and replaces it with a slot in the stack frame allocated via alloca.
+/// The PHI node is deleted. It returns the pointer to the alloca inserted.
+AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
+ if (P->use_empty()) {
+ P->eraseFromParent();
+ return nullptr;
+ }
+
+ const DataLayout &DL = P->getModule()->getDataLayout();
+
+ // Create a stack slot to hold the value.
+ AllocaInst *Slot;
+ if (AllocaPoint) {
+ Slot = new AllocaInst(P->getType(), DL.getAllocaAddrSpace(), nullptr,
+ P->getName()+".reg2mem", AllocaPoint);
+ } else {
+ Function *F = P->getParent()->getParent();
+ Slot = new AllocaInst(P->getType(), DL.getAllocaAddrSpace(), nullptr,
+ P->getName() + ".reg2mem",
+ &F->getEntryBlock().front());
+ }
+
+ // Iterate over each operand inserting a store in each predecessor.
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) {
+ assert(II->getParent() != P->getIncomingBlock(i) &&
+ "Invoke edge not supported yet"); (void)II;
+ }
+ new StoreInst(P->getIncomingValue(i), Slot,
+ P->getIncomingBlock(i)->getTerminator());
+ }
+
+ // Insert a load in place of the PHI and replace all uses.
+ BasicBlock::iterator InsertPt = P->getIterator();
+
+ for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
+ /* empty */; // Don't insert before PHI nodes or landingpad instrs.
+
+ Value *V = new LoadInst(Slot, P->getName() + ".reload", &*InsertPt);
+ P->replaceAllUsesWith(V);
+
+ // Delete PHI.
+ P->eraseFromParent();
+ return Slot;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
new file mode 100644
index 000000000000..78d7474e5b95
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
@@ -0,0 +1,95 @@
+//===- EscapeEnumerator.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines a helper class that enumerates all possible exits from a function,
+// including exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/EscapeEnumerator.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+static Constant *getDefaultPersonalityFn(Module *M) {
+ LLVMContext &C = M->getContext();
+ Triple T(M->getTargetTriple());
+ EHPersonality Pers = getDefaultEHPersonality(T);
+ return M->getOrInsertFunction(getEHPersonalityName(Pers),
+ FunctionType::get(Type::getInt32Ty(C), true));
+}
+
+IRBuilder<> *EscapeEnumerator::Next() {
+ if (Done)
+ return nullptr;
+
+ // Find all 'return', 'resume', and 'unwind' instructions.
+ while (StateBB != StateE) {
+ BasicBlock *CurBB = &*StateBB++;
+
+ // Branches and invokes do not escape, only unwind, resume, and return
+ // do.
+ TerminatorInst *TI = CurBB->getTerminator();
+ if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
+ continue;
+
+ Builder.SetInsertPoint(TI);
+ return &Builder;
+ }
+
+ Done = true;
+
+ if (!HandleExceptions)
+ return nullptr;
+
+ if (F.doesNotThrow())
+ return nullptr;
+
+ // Find all 'call' instructions that may throw.
+ SmallVector<Instruction *, 16> Calls;
+ for (BasicBlock &BB : F)
+ for (Instruction &II : BB)
+ if (CallInst *CI = dyn_cast<CallInst>(&II))
+ if (!CI->doesNotThrow())
+ Calls.push_back(CI);
+
+ if (Calls.empty())
+ return nullptr;
+
+ // Create a cleanup block.
+ LLVMContext &C = F.getContext();
+ BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
+ Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C));
+ if (!F.hasPersonalityFn()) {
+ Constant *PersFn = getDefaultPersonalityFn(F.getParent());
+ F.setPersonalityFn(PersFn);
+ }
+
+ if (isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) {
+ report_fatal_error("Funclet EH not supported");
+ }
+
+ LandingPadInst *LPad =
+ LandingPadInst::Create(ExnTy, 1, "cleanup.lpad", CleanupBB);
+ LPad->setCleanup(true);
+ ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
+
+ // Transform the 'call' instructions into 'invoke's branching to the
+ // cleanup block. Go in reverse order to make prettier BB names.
+ SmallVector<Value *, 16> Args;
+ for (unsigned I = Calls.size(); I != 0;) {
+ CallInst *CI = cast<CallInst>(Calls[--I]);
+ changeToInvokeAndSplitBasicBlock(CI, CleanupBB);
+ }
+
+ Builder.SetInsertPoint(RI);
+ return &Builder;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp
new file mode 100644
index 000000000000..1328f2f3ec01
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp
@@ -0,0 +1,597 @@
+//===- Evaluator.cpp - LLVM IR evaluator ----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Function evaluator for LLVM IR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Evaluator.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "evaluator"
+
+using namespace llvm;
+
+static inline bool
+isSimpleEnoughValueToCommit(Constant *C,
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL);
+
+/// Return true if the specified constant can be handled by the code generator.
+/// We don't want to generate something like:
+/// void *X = &X/42;
+/// because the code generator doesn't have a relocation that can handle that.
+///
+/// This function should be called if C was not found (but just got inserted)
+/// in SimpleConstants to avoid having to rescan the same constants all the
+/// time.
+static bool
+isSimpleEnoughValueToCommitHelper(Constant *C,
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL) {
+ // Simple global addresses are supported, do not allow dllimport or
+ // thread-local globals.
+ if (auto *GV = dyn_cast<GlobalValue>(C))
+ return !GV->hasDLLImportStorageClass() && !GV->isThreadLocal();
+
+ // Simple integer, undef, constant aggregate zero, etc are all supported.
+ if (C->getNumOperands() == 0 || isa<BlockAddress>(C))
+ return true;
+
+ // Aggregate values are safe if all their elements are.
+ if (isa<ConstantAggregate>(C)) {
+ for (Value *Op : C->operands())
+ if (!isSimpleEnoughValueToCommit(cast<Constant>(Op), SimpleConstants, DL))
+ return false;
+ return true;
+ }
+
+ // We don't know exactly what relocations are allowed in constant expressions,
+ // so we allow &global+constantoffset, which is safe and uniformly supported
+ // across targets.
+ ConstantExpr *CE = cast<ConstantExpr>(C);
+ switch (CE->getOpcode()) {
+ case Instruction::BitCast:
+ // Bitcast is fine if the casted value is fine.
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
+
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ // int <=> ptr is fine if the int type is the same size as the
+ // pointer type.
+ if (DL.getTypeSizeInBits(CE->getType()) !=
+ DL.getTypeSizeInBits(CE->getOperand(0)->getType()))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
+
+ // GEP is fine if it is simple + constant offset.
+ case Instruction::GetElementPtr:
+ for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
+ if (!isa<ConstantInt>(CE->getOperand(i)))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
+
+ case Instruction::Add:
+ // We allow simple+cst.
+ if (!isa<ConstantInt>(CE->getOperand(1)))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
+ }
+ return false;
+}
+
+static inline bool
+isSimpleEnoughValueToCommit(Constant *C,
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL) {
+ // If we already checked this constant, we win.
+ if (!SimpleConstants.insert(C).second)
+ return true;
+ // Check the constant.
+ return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL);
+}
+
+/// Return true if this constant is simple enough for us to understand. In
+/// particular, if it is a cast to anything other than from one pointer type to
+/// another pointer type, we punt. We basically just support direct accesses to
+/// globals and GEP's of globals. This should be kept up to date with
+/// CommitValueTo.
+static bool isSimpleEnoughPointerToCommit(Constant *C) {
+ // Conservatively, avoid aggregate types. This is because we don't
+ // want to worry about them partially overlapping other stores.
+ if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
+ return false;
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+ // Do not allow weak/*_odr/linkonce linkage or external globals.
+ return GV->hasUniqueInitializer();
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ // Handle a constantexpr gep.
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ isa<GlobalVariable>(CE->getOperand(0)) &&
+ cast<GEPOperator>(CE)->isInBounds()) {
+ GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+ // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+ // external globals.
+ if (!GV->hasUniqueInitializer())
+ return false;
+
+ // The first index must be zero.
+ ConstantInt *CI = dyn_cast<ConstantInt>(*std::next(CE->op_begin()));
+ if (!CI || !CI->isZero()) return false;
+
+ // The remaining indices must be compile-time known integers within the
+ // notional bounds of the corresponding static array types.
+ if (!CE->isGEPWithNoNotionalOverIndexing())
+ return false;
+
+ return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+
+ // A constantexpr bitcast from a pointer to another pointer is a no-op,
+ // and we know how to evaluate it by moving the bitcast from the pointer
+ // operand to the value operand.
+ } else if (CE->getOpcode() == Instruction::BitCast &&
+ isa<GlobalVariable>(CE->getOperand(0))) {
+ // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+ // external globals.
+ return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
+ }
+ }
+
+ return false;
+}
+
+/// Return the value that would be computed by a load from P after the stores
+/// reflected by 'memory' have been performed. If we can't decide, return null.
+Constant *Evaluator::ComputeLoadResult(Constant *P) {
+ // If this memory location has been recently stored, use the stored value: it
+ // is the most up-to-date.
+ DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P);
+ if (I != MutatedMemory.end()) return I->second;
+
+ // Access it.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
+ if (GV->hasDefinitiveInitializer())
+ return GV->getInitializer();
+ return nullptr;
+ }
+
+ // Handle a constantexpr getelementptr.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ isa<GlobalVariable>(CE->getOperand(0))) {
+ GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+ if (GV->hasDefinitiveInitializer())
+ return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+ }
+
+ return nullptr; // don't know how to evaluate.
+}
+
+/// Evaluate all instructions in block BB, returning true if successful, false
+/// if we can't evaluate it. NewBB returns the next BB that control flows into,
+/// or null upon return.
+bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
+ BasicBlock *&NextBB) {
+ // This is the main evaluation loop.
+ while (1) {
+ Constant *InstResult = nullptr;
+
+ DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
+ if (!SI->isSimple()) {
+ DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n");
+ return false; // no volatile/atomic accesses.
+ }
+ Constant *Ptr = getVal(SI->getOperand(1));
+ if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) {
+ DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
+ Ptr = FoldedPtr;
+ DEBUG(dbgs() << "; To: " << *Ptr << "\n");
+ }
+ if (!isSimpleEnoughPointerToCommit(Ptr)) {
+ // If this is too complex for us to commit, reject it.
+ DEBUG(dbgs() << "Pointer is too complex for us to evaluate store.");
+ return false;
+ }
+
+ Constant *Val = getVal(SI->getOperand(0));
+
+ // If this might be too difficult for the backend to handle (e.g. the addr
+ // of one global variable divided by another) then we can't commit it.
+ if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) {
+ DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val
+ << "\n");
+ return false;
+ }
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+ if (CE->getOpcode() == Instruction::BitCast) {
+ DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n");
+ // If we're evaluating a store through a bitcast, then we need
+ // to pull the bitcast off the pointer type and push it onto the
+ // stored value.
+ Ptr = CE->getOperand(0);
+
+ Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();
+
+ // In order to push the bitcast onto the stored value, a bitcast
+ // from NewTy to Val's type must be legal. If it's not, we can try
+ // introspecting NewTy to find a legal conversion.
+ while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
+ // If NewTy is a struct, we can convert the pointer to the struct
+ // into a pointer to its first member.
+ // FIXME: This could be extended to support arrays as well.
+ if (StructType *STy = dyn_cast<StructType>(NewTy)) {
+ NewTy = STy->getTypeAtIndex(0U);
+
+ IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32);
+ Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
+ Constant * const IdxList[] = {IdxZero, IdxZero};
+
+ Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList);
+ if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI))
+ Ptr = FoldedPtr;
+
+ // If we can't improve the situation by introspecting NewTy,
+ // we have to give up.
+ } else {
+ DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
+ "evaluate.\n");
+ return false;
+ }
+ }
+
+ // If we found compatible types, go ahead and push the bitcast
+ // onto the stored value.
+ Val = ConstantExpr::getBitCast(Val, NewTy);
+
+ DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n");
+ }
+ }
+
+ MutatedMemory[Ptr] = Val;
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
+ InstResult = ConstantExpr::get(BO->getOpcode(),
+ getVal(BO->getOperand(0)),
+ getVal(BO->getOperand(1)));
+ DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult
+ << "\n");
+ } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
+ InstResult = ConstantExpr::getCompare(CI->getPredicate(),
+ getVal(CI->getOperand(0)),
+ getVal(CI->getOperand(1)));
+ DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult
+ << "\n");
+ } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
+ InstResult = ConstantExpr::getCast(CI->getOpcode(),
+ getVal(CI->getOperand(0)),
+ CI->getType());
+ DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult
+ << "\n");
+ } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
+ InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
+ getVal(SI->getOperand(1)),
+ getVal(SI->getOperand(2)));
+ DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult
+ << "\n");
+ } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) {
+ InstResult = ConstantExpr::getExtractValue(
+ getVal(EVI->getAggregateOperand()), EVI->getIndices());
+ DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " << *InstResult
+ << "\n");
+ } else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) {
+ InstResult = ConstantExpr::getInsertValue(
+ getVal(IVI->getAggregateOperand()),
+ getVal(IVI->getInsertedValueOperand()), IVI->getIndices());
+ DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " << *InstResult
+ << "\n");
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
+ Constant *P = getVal(GEP->getOperand(0));
+ SmallVector<Constant*, 8> GEPOps;
+ for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
+ i != e; ++i)
+ GEPOps.push_back(getVal(*i));
+ InstResult =
+ ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps,
+ cast<GEPOperator>(GEP)->isInBounds());
+ DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
+ << "\n");
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
+
+ if (!LI->isSimple()) {
+ DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");
+ return false; // no volatile/atomic accesses.
+ }
+
+ Constant *Ptr = getVal(LI->getOperand(0));
+ if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) {
+ Ptr = FoldedPtr;
+ DEBUG(dbgs() << "Found a constant pointer expression, constant "
+ "folding: " << *Ptr << "\n");
+ }
+ InstResult = ComputeLoadResult(Ptr);
+ if (!InstResult) {
+ DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load."
+ "\n");
+ return false; // Could not evaluate load.
+ }
+
+ DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n");
+ } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
+ if (AI->isArrayAllocation()) {
+ DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n");
+ return false; // Cannot handle array allocs.
+ }
+ Type *Ty = AI->getAllocatedType();
+ AllocaTmps.push_back(
+ make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage,
+ UndefValue::get(Ty), AI->getName()));
+ InstResult = AllocaTmps.back().get();
+ DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
+ } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
+ CallSite CS(&*CurInst);
+
+ // Debug info can safely be ignored here.
+ if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
+ DEBUG(dbgs() << "Ignoring debug info.\n");
+ ++CurInst;
+ continue;
+ }
+
+ // Cannot handle inline asm.
+ if (isa<InlineAsm>(CS.getCalledValue())) {
+ DEBUG(dbgs() << "Found inline asm, can not evaluate.\n");
+ return false;
+ }
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
+ if (MSI->isVolatile()) {
+ DEBUG(dbgs() << "Can not optimize a volatile memset " <<
+ "intrinsic.\n");
+ return false;
+ }
+ Constant *Ptr = getVal(MSI->getDest());
+ Constant *Val = getVal(MSI->getValue());
+ Constant *DestVal = ComputeLoadResult(getVal(Ptr));
+ if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
+ // This memset is a no-op.
+ DEBUG(dbgs() << "Ignoring no-op memset.\n");
+ ++CurInst;
+ continue;
+ }
+ }
+
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
+ ++CurInst;
+ continue;
+ }
+
+ if (II->getIntrinsicID() == Intrinsic::invariant_start) {
+ // We don't insert an entry into Values, as it doesn't have a
+ // meaningful return value.
+ if (!II->use_empty()) {
+ DEBUG(dbgs() << "Found unused invariant_start. Can't evaluate.\n");
+ return false;
+ }
+ ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
+ Value *PtrArg = getVal(II->getArgOperand(1));
+ Value *Ptr = PtrArg->stripPointerCasts();
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
+ Type *ElemTy = GV->getValueType();
+ if (!Size->isMinusOne() &&
+ Size->getValue().getLimitedValue() >=
+ DL.getTypeStoreSize(ElemTy)) {
+ Invariants.insert(GV);
+ DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV
+ << "\n");
+ } else {
+ DEBUG(dbgs() << "Found a global var, but can not treat it as an "
+ "invariant.\n");
+ }
+ }
+ // Continue even if we do nothing.
+ ++CurInst;
+ continue;
+ } else if (II->getIntrinsicID() == Intrinsic::assume) {
+ DEBUG(dbgs() << "Skipping assume intrinsic.\n");
+ ++CurInst;
+ continue;
+ }
+
+ DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
+ return false;
+ }
+
+ // Resolve function pointers.
+ Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue()));
+ if (!Callee || Callee->isInterposable()) {
+ DEBUG(dbgs() << "Can not resolve function pointer.\n");
+ return false; // Cannot resolve.
+ }
+
+ SmallVector<Constant*, 8> Formals;
+ for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i)
+ Formals.push_back(getVal(*i));
+
+ if (Callee->isDeclaration()) {
+ // If this is a function we can constant fold, do it.
+ if (Constant *C = ConstantFoldCall(CS, Callee, Formals, TLI)) {
+ InstResult = C;
+ DEBUG(dbgs() << "Constant folded function call. Result: " <<
+ *InstResult << "\n");
+ } else {
+ DEBUG(dbgs() << "Can not constant fold function call.\n");
+ return false;
+ }
+ } else {
+ if (Callee->getFunctionType()->isVarArg()) {
+ DEBUG(dbgs() << "Can not constant fold vararg function call.\n");
+ return false;
+ }
+
+ Constant *RetVal = nullptr;
+ // Execute the call, if successful, use the return value.
+ ValueStack.emplace_back();
+ if (!EvaluateFunction(Callee, RetVal, Formals)) {
+ DEBUG(dbgs() << "Failed to evaluate function.\n");
+ return false;
+ }
+ ValueStack.pop_back();
+ InstResult = RetVal;
+
+ if (InstResult) {
+ DEBUG(dbgs() << "Successfully evaluated function. Result: "
+ << *InstResult << "\n\n");
+ } else {
+ DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n");
+ }
+ }
+ } else if (isa<TerminatorInst>(CurInst)) {
+ DEBUG(dbgs() << "Found a terminator instruction.\n");
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
+ if (BI->isUnconditional()) {
+ NextBB = BI->getSuccessor(0);
+ } else {
+ ConstantInt *Cond =
+ dyn_cast<ConstantInt>(getVal(BI->getCondition()));
+ if (!Cond) return false; // Cannot determine.
+
+ NextBB = BI->getSuccessor(!Cond->getZExtValue());
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
+ ConstantInt *Val =
+ dyn_cast<ConstantInt>(getVal(SI->getCondition()));
+ if (!Val) return false; // Cannot determine.
+ NextBB = SI->findCaseValue(Val)->getCaseSuccessor();
+ } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
+ Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
+ if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
+ NextBB = BA->getBasicBlock();
+ else
+ return false; // Cannot determine.
+ } else if (isa<ReturnInst>(CurInst)) {
+ NextBB = nullptr;
+ } else {
+ // invoke, unwind, resume, unreachable.
+ DEBUG(dbgs() << "Can not handle terminator.");
+ return false; // Cannot handle this terminator.
+ }
+
+ // We succeeded at evaluating this block!
+ DEBUG(dbgs() << "Successfully evaluated block.\n");
+ return true;
+ } else {
+ // Did not know how to evaluate this!
+ DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction."
+ "\n");
+ return false;
+ }
+
+ if (!CurInst->use_empty()) {
+ if (auto *FoldedInstResult = ConstantFoldConstant(InstResult, DL, TLI))
+ InstResult = FoldedInstResult;
+
+ setVal(&*CurInst, InstResult);
+ }
+
+ // If we just processed an invoke, we finished evaluating the block.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
+ NextBB = II->getNormalDest();
+ DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n");
+ return true;
+ }
+
+ // Advance program counter.
+ ++CurInst;
+ }
+}
+
+/// Evaluate a call to function F, returning true if successful, false if we
+/// can't evaluate it. ActualArgs contains the formal arguments for the
+/// function.
+bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
+ const SmallVectorImpl<Constant*> &ActualArgs) {
+ // Check to see if this function is already executing (recursion). If so,
+ // bail out. TODO: we might want to accept limited recursion.
+ if (is_contained(CallStack, F))
+ return false;
+
+ CallStack.push_back(F);
+
+ // Initialize arguments to the incoming values specified.
+ unsigned ArgNo = 0;
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
+ ++AI, ++ArgNo)
+ setVal(&*AI, ActualArgs[ArgNo]);
+
+ // ExecutedBlocks - We only handle non-looping, non-recursive code. As such,
+ // we can only evaluate any one basic block at most once. This set keeps
+ // track of what we have executed so we can detect recursive cases etc.
+ SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
+
+ // CurBB - The current basic block we're evaluating.
+ BasicBlock *CurBB = &F->front();
+
+ BasicBlock::iterator CurInst = CurBB->begin();
+
+ while (1) {
+ BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings.
+ DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
+
+ if (!EvaluateBlock(CurInst, NextBB))
+ return false;
+
+ if (!NextBB) {
+ // Successfully running until there's no next block means that we found
+ // the return. Fill it the return value and pop the call stack.
+ ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
+ if (RI->getNumOperands())
+ RetVal = getVal(RI->getOperand(0));
+ CallStack.pop_back();
+ return true;
+ }
+
+ // Okay, we succeeded in evaluating this control flow. See if we have
+ // executed the new block before. If so, we have a looping function,
+ // which we cannot evaluate in reasonable time.
+ if (!ExecutedBlocks.insert(NextBB).second)
+ return false; // looped!
+
+ // Okay, we have never been in this block before. Check to see if there
+ // are any PHI nodes. If so, evaluate them with information about where
+ // we came from.
+ PHINode *PN = nullptr;
+ for (CurInst = NextBB->begin();
+ (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
+ setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB)));
+
+ // Advance to the next block.
+ CurBB = NextBB;
+ }
+}
+
diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
new file mode 100644
index 000000000000..435eff3bef47
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
@@ -0,0 +1,482 @@
+//===- FlatternCFG.cpp - Code to perform CFG flattening ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Reduce conditional branches in CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "flattencfg"
+
+namespace {
+class FlattenCFGOpt {
+ AliasAnalysis *AA;
+ /// \brief Use parallel-and or parallel-or to generate conditions for
+ /// conditional branches.
+ bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder);
+ /// \brief If \param BB is the merge block of an if-region, attempt to merge
+ /// the if-region with an adjacent if-region upstream if two if-regions
+ /// contain identical instructions.
+ bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder);
+ /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which
+ /// are from two if-regions whose entry blocks are \p Head1 and \p
+ /// Head2. \returns true if \p Block1 and \p Block2 contain identical
+ /// instructions, and have no memory reference alias with \p Head2.
+ /// This is used as a legality check for merging if-regions.
+ bool CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
+ BasicBlock *Block1, BasicBlock *Block2);
+
+public:
+ FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {}
+ bool run(BasicBlock *BB);
+};
+}
+
+/// If \param [in] BB has more than one predecessor that is a conditional
+/// branch, attempt to use parallel and/or for the branch condition. \returns
+/// true on success.
+///
+/// Before:
+/// ......
+/// %cmp10 = fcmp une float %tmp1, %tmp2
+/// br i1 %cmp1, label %if.then, label %lor.rhs
+///
+/// lor.rhs:
+/// ......
+/// %cmp11 = fcmp une float %tmp3, %tmp4
+/// br i1 %cmp11, label %if.then, label %ifend
+///
+/// if.end: // the merge block
+/// ......
+///
+/// if.then: // has two predecessors, both of them contains conditional branch.
+/// ......
+/// br label %if.end;
+///
+/// After:
+/// ......
+/// %cmp10 = fcmp une float %tmp1, %tmp2
+/// ......
+/// %cmp11 = fcmp une float %tmp3, %tmp4
+/// %cmp12 = or i1 %cmp10, %cmp11 // parallel-or mode.
+/// br i1 %cmp12, label %if.then, label %ifend
+///
+/// if.end:
+/// ......
+///
+/// if.then:
+/// ......
+/// br label %if.end;
+///
+/// Current implementation handles two cases.
+/// Case 1: \param BB is on the else-path.
+///
+/// BB1
+/// / |
+/// BB2 |
+/// / \ |
+/// BB3 \ | where, BB1, BB2 contain conditional branches.
+/// \ | / BB3 contains unconditional branch.
+/// \ | / BB4 corresponds to \param BB which is also the merge.
+/// BB => BB4
+///
+///
+/// Corresponding source code:
+///
+/// if (a == b && c == d)
+/// statement; // BB3
+///
+/// Case 2: \param BB BB is on the then-path.
+///
+/// BB1
+/// / |
+/// | BB2
+/// \ / | where BB1, BB2 contain conditional branches.
+/// BB => BB3 | BB3 contains unconditiona branch and corresponds
+/// \ / to \param BB. BB4 is the merge.
+/// BB4
+///
+/// Corresponding source code:
+///
+/// if (a == b || c == d)
+/// statement; // BB3
+///
+/// In both cases, \param BB is the common successor of conditional branches.
+/// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as
+/// its predecessor. In Case 2, \param BB (BB3) only has conditional branches
+/// as its predecessors.
+///
+bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
+ PHINode *PHI = dyn_cast<PHINode>(BB->begin());
+ if (PHI)
+ return false; // For simplicity, avoid cases containing PHI nodes.
+
+ BasicBlock *LastCondBlock = nullptr;
+ BasicBlock *FirstCondBlock = nullptr;
+ BasicBlock *UnCondBlock = nullptr;
+ int Idx = -1;
+
+ // Check predecessors of \param BB.
+ SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
+ for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end();
+ PI != PE; ++PI) {
+ BasicBlock *Pred = *PI;
+ BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator());
+
+ // All predecessors should terminate with a branch.
+ if (!PBI)
+ return false;
+
+ BasicBlock *PP = Pred->getSinglePredecessor();
+
+ if (PBI->isUnconditional()) {
+ // Case 1: Pred (BB3) is an unconditional block, it should
+ // have a single predecessor (BB2) that is also a predecessor
+ // of \param BB (BB4) and should not have address-taken.
+ // There should exist only one such unconditional
+ // branch among the predecessors.
+ if (UnCondBlock || !PP || (Preds.count(PP) == 0) ||
+ Pred->hasAddressTaken())
+ return false;
+
+ UnCondBlock = Pred;
+ continue;
+ }
+
+ // Only conditional branches are allowed beyond this point.
+ assert(PBI->isConditional());
+
+ // Condition's unique use should be the branch instruction.
+ Value *PC = PBI->getCondition();
+ if (!PC || !PC->hasOneUse())
+ return false;
+
+ if (PP && Preds.count(PP)) {
+ // These are internal condition blocks to be merged from, e.g.,
+ // BB2 in both cases.
+ // Should not be address-taken.
+ if (Pred->hasAddressTaken())
+ return false;
+
+ // Instructions in the internal condition blocks should be safe
+ // to hoist up.
+ for (BasicBlock::iterator BI = Pred->begin(), BE = PBI->getIterator();
+ BI != BE;) {
+ Instruction *CI = &*BI++;
+ if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI))
+ return false;
+ }
+ } else {
+ // This is the condition block to be merged into, e.g. BB1 in
+ // both cases.
+ if (FirstCondBlock)
+ return false;
+ FirstCondBlock = Pred;
+ }
+
+ // Find whether BB is uniformly on the true (or false) path
+ // for all of its predecessors.
+ BasicBlock *PS1 = PBI->getSuccessor(0);
+ BasicBlock *PS2 = PBI->getSuccessor(1);
+ BasicBlock *PS = (PS1 == BB) ? PS2 : PS1;
+ int CIdx = (PS1 == BB) ? 0 : 1;
+
+ if (Idx == -1)
+ Idx = CIdx;
+ else if (CIdx != Idx)
+ return false;
+
+ // PS is the successor which is not BB. Check successors to identify
+ // the last conditional branch.
+ if (Preds.count(PS) == 0) {
+ // Case 2.
+ LastCondBlock = Pred;
+ } else {
+ // Case 1
+ BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator());
+ if (BPS && BPS->isUnconditional()) {
+ // Case 1: PS(BB3) should be an unconditional branch.
+ LastCondBlock = Pred;
+ }
+ }
+ }
+
+ if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock))
+ return false;
+
+ TerminatorInst *TBB = LastCondBlock->getTerminator();
+ BasicBlock *PS1 = TBB->getSuccessor(0);
+ BasicBlock *PS2 = TBB->getSuccessor(1);
+ BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator());
+ BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator());
+
+ // If PS1 does not jump into PS2, but PS2 jumps into PS1,
+ // attempt branch inversion.
+ if (!PBI1 || !PBI1->isUnconditional() ||
+ (PS1->getTerminator()->getSuccessor(0) != PS2)) {
+ // Check whether PS2 jumps into PS1.
+ if (!PBI2 || !PBI2->isUnconditional() ||
+ (PS2->getTerminator()->getSuccessor(0) != PS1))
+ return false;
+
+ // Do branch inversion.
+ BasicBlock *CurrBlock = LastCondBlock;
+ bool EverChanged = false;
+ for (;CurrBlock != FirstCondBlock;
+ CurrBlock = CurrBlock->getSinglePredecessor()) {
+ BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
+ CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+ if (!CI)
+ continue;
+
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ // Canonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
+ if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) {
+ CI->setPredicate(ICmpInst::getInversePredicate(Predicate));
+ BI->swapSuccessors();
+ EverChanged = true;
+ }
+ }
+ return EverChanged;
+ }
+
+ // PS1 must have a conditional branch.
+ if (!PBI1 || !PBI1->isUnconditional())
+ return false;
+
+ // PS2 should not contain PHI node.
+ PHI = dyn_cast<PHINode>(PS2->begin());
+ if (PHI)
+ return false;
+
+ // Do the transformation.
+ BasicBlock *CB;
+ BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator());
+ bool Iteration = true;
+ IRBuilder<>::InsertPointGuard Guard(Builder);
+ Value *PC = PBI->getCondition();
+
+ do {
+ CB = PBI->getSuccessor(1 - Idx);
+ // Delete the conditional branch.
+ FirstCondBlock->getInstList().pop_back();
+ FirstCondBlock->getInstList()
+ .splice(FirstCondBlock->end(), CB->getInstList());
+ PBI = cast<BranchInst>(FirstCondBlock->getTerminator());
+ Value *CC = PBI->getCondition();
+ // Merge conditions.
+ Builder.SetInsertPoint(PBI);
+ Value *NC;
+ if (Idx == 0)
+ // Case 2, use parallel or.
+ NC = Builder.CreateOr(PC, CC);
+ else
+ // Case 1, use parallel and.
+ NC = Builder.CreateAnd(PC, CC);
+
+ PBI->replaceUsesOfWith(CC, NC);
+ PC = NC;
+ if (CB == LastCondBlock)
+ Iteration = false;
+ // Remove internal conditional branches.
+ CB->dropAllReferences();
+ // make CB unreachable and let downstream to delete the block.
+ new UnreachableInst(CB->getContext(), CB);
+ } while (Iteration);
+
+ DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
+ return true;
+}
+
+/// Compare blocks from two if-regions, where \param Head1 is the entry of the
+/// 1st if-region. \param Head2 is the entry of the 2nd if-region. \param
+/// Block1 is a block in the 1st if-region to compare. \param Block2 is a block
+// in the 2nd if-region to compare. \returns true if \param Block1 and \param
+/// Block2 have identical instructions and do not have memory reference alias
+/// with \param Head2.
+///
+bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
+ BasicBlock *Block1,
+ BasicBlock *Block2) {
+ TerminatorInst *PTI2 = Head2->getTerminator();
+ Instruction *PBI2 = &Head2->front();
+
+ bool eq1 = (Block1 == Head1);
+ bool eq2 = (Block2 == Head2);
+ if (eq1 || eq2) {
+ // An empty then-path or else-path.
+ return (eq1 == eq2);
+ }
+
+ // Check whether instructions in Block1 and Block2 are identical
+ // and do not alias with instructions in Head2.
+ BasicBlock::iterator iter1 = Block1->begin();
+ BasicBlock::iterator end1 = Block1->getTerminator()->getIterator();
+ BasicBlock::iterator iter2 = Block2->begin();
+ BasicBlock::iterator end2 = Block2->getTerminator()->getIterator();
+
+ while (1) {
+ if (iter1 == end1) {
+ if (iter2 != end2)
+ return false;
+ break;
+ }
+
+ if (!iter1->isIdenticalTo(&*iter2))
+ return false;
+
+ // Illegal to remove instructions with side effects except
+ // non-volatile stores.
+ if (iter1->mayHaveSideEffects()) {
+ Instruction *CurI = &*iter1;
+ StoreInst *SI = dyn_cast<StoreInst>(CurI);
+ if (!SI || SI->isVolatile())
+ return false;
+ }
+
+ // For simplicity and speed, data dependency check can be
+ // avoided if read from memory doesn't exist.
+ if (iter1->mayReadFromMemory())
+ return false;
+
+ if (iter1->mayWriteToMemory()) {
+ for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) {
+ if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) {
+ // Check alias with Head2.
+ if (!AA || AA->alias(&*iter1, &*BI))
+ return false;
+ }
+ }
+ }
+ ++iter1;
+ ++iter2;
+ }
+
+ return true;
+}
+
+/// Check whether \param BB is the merge block of a if-region. If yes, check
+/// whether there exists an adjacent if-region upstream, the two if-regions
+/// contain identical instructions and can be legally merged. \returns true if
+/// the two if-regions are merged.
+///
+/// From:
+/// if (a)
+/// statement;
+/// if (b)
+/// statement;
+///
+/// To:
+/// if (a || b)
+/// statement;
+///
+bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
+ BasicBlock *IfTrue2, *IfFalse2;
+ Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2);
+ Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2);
+ if (!CInst2)
+ return false;
+
+ BasicBlock *SecondEntryBlock = CInst2->getParent();
+ if (SecondEntryBlock->hasAddressTaken())
+ return false;
+
+ BasicBlock *IfTrue1, *IfFalse1;
+ Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1);
+ Instruction *CInst1 = dyn_cast_or_null<Instruction>(IfCond1);
+ if (!CInst1)
+ return false;
+
+ BasicBlock *FirstEntryBlock = CInst1->getParent();
+
+ // Either then-path or else-path should be empty.
+ if ((IfTrue1 != FirstEntryBlock) && (IfFalse1 != FirstEntryBlock))
+ return false;
+ if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock))
+ return false;
+
+ TerminatorInst *PTI2 = SecondEntryBlock->getTerminator();
+ Instruction *PBI2 = &SecondEntryBlock->front();
+
+ if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1,
+ IfTrue2))
+ return false;
+
+ if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfFalse1,
+ IfFalse2))
+ return false;
+
+ // Check whether \param SecondEntryBlock has side-effect and is safe to
+ // speculate.
+ for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) {
+ Instruction *CI = &*BI;
+ if (isa<PHINode>(CI) || CI->mayHaveSideEffects() ||
+ !isSafeToSpeculativelyExecute(CI))
+ return false;
+ }
+
+ // Merge \param SecondEntryBlock into \param FirstEntryBlock.
+ FirstEntryBlock->getInstList().pop_back();
+ FirstEntryBlock->getInstList()
+ .splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList());
+ BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator());
+ Value *CC = PBI->getCondition();
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ Builder.SetInsertPoint(PBI);
+ Value *NC = Builder.CreateOr(CInst1, CC);
+ PBI->replaceUsesOfWith(CC, NC);
+ Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+
+ // Remove IfTrue1
+ if (IfTrue1 != FirstEntryBlock) {
+ IfTrue1->dropAllReferences();
+ IfTrue1->eraseFromParent();
+ }
+
+ // Remove IfFalse1
+ if (IfFalse1 != FirstEntryBlock) {
+ IfFalse1->dropAllReferences();
+ IfFalse1->eraseFromParent();
+ }
+
+ // Remove \param SecondEntryBlock
+ SecondEntryBlock->dropAllReferences();
+ SecondEntryBlock->eraseFromParent();
+ DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock);
+ return true;
+}
+
+bool FlattenCFGOpt::run(BasicBlock *BB) {
+ assert(BB && BB->getParent() && "Block not embedded in function!");
+ assert(BB->getTerminator() && "Degenerate basic block encountered!");
+
+ IRBuilder<> Builder(BB);
+
+ if (FlattenParallelAndOr(BB, Builder) || MergeIfRegion(BB, Builder))
+ return true;
+ return false;
+}
+
+/// FlattenCFG - This function is used to flatten a CFG. For
+/// example, it uses parallel-and and parallel-or mode to collapse
+// if-conditions and merge if-regions with identical statements.
+///
+bool llvm::FlattenCFG(BasicBlock *BB, AliasAnalysis *AA) {
+ return FlattenCFGOpt(AA).run(BB);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp
new file mode 100644
index 000000000000..4a2be3a53176
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -0,0 +1,923 @@
+//===- FunctionComparator.h - Function Comparator -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the FunctionComparator and GlobalNumberState classes
+// which are used by the MergeFunctions pass for comparing functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/FunctionComparator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "functioncomparator"
+
+int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
+ if (L < R) return -1;
+ if (L > R) return 1;
+ return 0;
+}
+
+int FunctionComparator::cmpOrderings(AtomicOrdering L, AtomicOrdering R) const {
+ if ((int)L < (int)R) return -1;
+ if ((int)L > (int)R) return 1;
+ return 0;
+}
+
+int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const {
+ if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth()))
+ return Res;
+ if (L.ugt(R)) return 1;
+ if (R.ugt(L)) return -1;
+ return 0;
+}
+
+int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const {
+ // Floats are ordered first by semantics (i.e. float, double, half, etc.),
+ // then by value interpreted as a bitstring (aka APInt).
+ const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics();
+ if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL),
+ APFloat::semanticsPrecision(SR)))
+ return Res;
+ if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL),
+ APFloat::semanticsMaxExponent(SR)))
+ return Res;
+ if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL),
+ APFloat::semanticsMinExponent(SR)))
+ return Res;
+ if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL),
+ APFloat::semanticsSizeInBits(SR)))
+ return Res;
+ return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt());
+}
+
+int FunctionComparator::cmpMem(StringRef L, StringRef R) const {
+ // Prevent heavy comparison, compare sizes first.
+ if (int Res = cmpNumbers(L.size(), R.size()))
+ return Res;
+
+ // Compare strings lexicographically only when it is necessary: only when
+ // strings are equal in size.
+ return L.compare(R);
+}
+
+int FunctionComparator::cmpAttrs(const AttributeList L,
+ const AttributeList R) const {
+ if (int Res = cmpNumbers(L.getNumAttrSets(), R.getNumAttrSets()))
+ return Res;
+
+ for (unsigned i = L.index_begin(), e = L.index_end(); i != e; ++i) {
+ AttributeSet LAS = L.getAttributes(i);
+ AttributeSet RAS = R.getAttributes(i);
+ AttributeSet::iterator LI = LAS.begin(), LE = LAS.end();
+ AttributeSet::iterator RI = RAS.begin(), RE = RAS.end();
+ for (; LI != LE && RI != RE; ++LI, ++RI) {
+ Attribute LA = *LI;
+ Attribute RA = *RI;
+ if (LA < RA)
+ return -1;
+ if (RA < LA)
+ return 1;
+ }
+ if (LI != LE)
+ return 1;
+ if (RI != RE)
+ return -1;
+ }
+ return 0;
+}
+
+int FunctionComparator::cmpRangeMetadata(const MDNode *L,
+ const MDNode *R) const {
+ if (L == R)
+ return 0;
+ if (!L)
+ return -1;
+ if (!R)
+ return 1;
+ // Range metadata is a sequence of numbers. Make sure they are the same
+ // sequence.
+ // TODO: Note that as this is metadata, it is possible to drop and/or merge
+ // this data when considering functions to merge. Thus this comparison would
+ // return 0 (i.e. equivalent), but merging would become more complicated
+ // because the ranges would need to be unioned. It is not likely that
+ // functions differ ONLY in this metadata if they are actually the same
+ // function semantically.
+ if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
+ return Res;
+ for (size_t I = 0; I < L->getNumOperands(); ++I) {
+ ConstantInt *LLow = mdconst::extract<ConstantInt>(L->getOperand(I));
+ ConstantInt *RLow = mdconst::extract<ConstantInt>(R->getOperand(I));
+ if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue()))
+ return Res;
+ }
+ return 0;
+}
+
+int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L,
+ const Instruction *R) const {
+ ImmutableCallSite LCS(L);
+ ImmutableCallSite RCS(R);
+
+ assert(LCS && RCS && "Must be calls or invokes!");
+ assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!");
+
+ if (int Res =
+ cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles()))
+ return Res;
+
+ for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) {
+ auto OBL = LCS.getOperandBundleAt(i);
+ auto OBR = RCS.getOperandBundleAt(i);
+
+ if (int Res = OBL.getTagName().compare(OBR.getTagName()))
+ return Res;
+
+ if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size()))
+ return Res;
+ }
+
+ return 0;
+}
+
+/// Constants comparison:
+/// 1. Check whether type of L constant could be losslessly bitcasted to R
+/// type.
+/// 2. Compare constant contents.
+/// For more details see declaration comments.
+int FunctionComparator::cmpConstants(const Constant *L,
+ const Constant *R) const {
+
+ Type *TyL = L->getType();
+ Type *TyR = R->getType();
+
+ // Check whether types are bitcastable. This part is just re-factored
+ // Type::canLosslesslyBitCastTo method, but instead of returning true/false,
+ // we also pack into result which type is "less" for us.
+ int TypesRes = cmpTypes(TyL, TyR);
+ if (TypesRes != 0) {
+ // Types are different, but check whether we can bitcast them.
+ if (!TyL->isFirstClassType()) {
+ if (TyR->isFirstClassType())
+ return -1;
+ // Neither TyL nor TyR are values of first class type. Return the result
+ // of comparing the types
+ return TypesRes;
+ }
+ if (!TyR->isFirstClassType()) {
+ if (TyL->isFirstClassType())
+ return 1;
+ return TypesRes;
+ }
+
+ // Vector -> Vector conversions are always lossless if the two vector types
+ // have the same size, otherwise not.
+ unsigned TyLWidth = 0;
+ unsigned TyRWidth = 0;
+
+ if (auto *VecTyL = dyn_cast<VectorType>(TyL))
+ TyLWidth = VecTyL->getBitWidth();
+ if (auto *VecTyR = dyn_cast<VectorType>(TyR))
+ TyRWidth = VecTyR->getBitWidth();
+
+ if (TyLWidth != TyRWidth)
+ return cmpNumbers(TyLWidth, TyRWidth);
+
+ // Zero bit-width means neither TyL nor TyR are vectors.
+ if (!TyLWidth) {
+ PointerType *PTyL = dyn_cast<PointerType>(TyL);
+ PointerType *PTyR = dyn_cast<PointerType>(TyR);
+ if (PTyL && PTyR) {
+ unsigned AddrSpaceL = PTyL->getAddressSpace();
+ unsigned AddrSpaceR = PTyR->getAddressSpace();
+ if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR))
+ return Res;
+ }
+ if (PTyL)
+ return 1;
+ if (PTyR)
+ return -1;
+
+ // TyL and TyR aren't vectors, nor pointers. We don't know how to
+ // bitcast them.
+ return TypesRes;
+ }
+ }
+
+ // OK, types are bitcastable, now check constant contents.
+
+ if (L->isNullValue() && R->isNullValue())
+ return TypesRes;
+ if (L->isNullValue() && !R->isNullValue())
+ return 1;
+ if (!L->isNullValue() && R->isNullValue())
+ return -1;
+
+ auto GlobalValueL = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(L));
+ auto GlobalValueR = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(R));
+ if (GlobalValueL && GlobalValueR) {
+ return cmpGlobalValues(GlobalValueL, GlobalValueR);
+ }
+
+ if (int Res = cmpNumbers(L->getValueID(), R->getValueID()))
+ return Res;
+
+ if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) {
+ const auto *SeqR = cast<ConstantDataSequential>(R);
+ // This handles ConstantDataArray and ConstantDataVector. Note that we
+ // compare the two raw data arrays, which might differ depending on the host
+ // endianness. This isn't a problem though, because the endiness of a module
+ // will affect the order of the constants, but this order is the same
+ // for a given input module and host platform.
+ return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues());
+ }
+
+ switch (L->getValueID()) {
+ case Value::UndefValueVal:
+ case Value::ConstantTokenNoneVal:
+ return TypesRes;
+ case Value::ConstantIntVal: {
+ const APInt &LInt = cast<ConstantInt>(L)->getValue();
+ const APInt &RInt = cast<ConstantInt>(R)->getValue();
+ return cmpAPInts(LInt, RInt);
+ }
+ case Value::ConstantFPVal: {
+ const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF();
+ const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF();
+ return cmpAPFloats(LAPF, RAPF);
+ }
+ case Value::ConstantArrayVal: {
+ const ConstantArray *LA = cast<ConstantArray>(L);
+ const ConstantArray *RA = cast<ConstantArray>(R);
+ uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements();
+ uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements();
+ if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+ return Res;
+ for (uint64_t i = 0; i < NumElementsL; ++i) {
+ if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)),
+ cast<Constant>(RA->getOperand(i))))
+ return Res;
+ }
+ return 0;
+ }
+ case Value::ConstantStructVal: {
+ const ConstantStruct *LS = cast<ConstantStruct>(L);
+ const ConstantStruct *RS = cast<ConstantStruct>(R);
+ unsigned NumElementsL = cast<StructType>(TyL)->getNumElements();
+ unsigned NumElementsR = cast<StructType>(TyR)->getNumElements();
+ if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+ return Res;
+ for (unsigned i = 0; i != NumElementsL; ++i) {
+ if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)),
+ cast<Constant>(RS->getOperand(i))))
+ return Res;
+ }
+ return 0;
+ }
+ case Value::ConstantVectorVal: {
+ const ConstantVector *LV = cast<ConstantVector>(L);
+ const ConstantVector *RV = cast<ConstantVector>(R);
+ unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements();
+ unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements();
+ if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+ return Res;
+ for (uint64_t i = 0; i < NumElementsL; ++i) {
+ if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)),
+ cast<Constant>(RV->getOperand(i))))
+ return Res;
+ }
+ return 0;
+ }
+ case Value::ConstantExprVal: {
+ const ConstantExpr *LE = cast<ConstantExpr>(L);
+ const ConstantExpr *RE = cast<ConstantExpr>(R);
+ unsigned NumOperandsL = LE->getNumOperands();
+ unsigned NumOperandsR = RE->getNumOperands();
+ if (int Res = cmpNumbers(NumOperandsL, NumOperandsR))
+ return Res;
+ for (unsigned i = 0; i < NumOperandsL; ++i) {
+ if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)),
+ cast<Constant>(RE->getOperand(i))))
+ return Res;
+ }
+ return 0;
+ }
+ case Value::BlockAddressVal: {
+ const BlockAddress *LBA = cast<BlockAddress>(L);
+ const BlockAddress *RBA = cast<BlockAddress>(R);
+ if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction()))
+ return Res;
+ if (LBA->getFunction() == RBA->getFunction()) {
+ // They are BBs in the same function. Order by which comes first in the
+ // BB order of the function. This order is deterministic.
+ Function* F = LBA->getFunction();
+ BasicBlock *LBB = LBA->getBasicBlock();
+ BasicBlock *RBB = RBA->getBasicBlock();
+ if (LBB == RBB)
+ return 0;
+ for(BasicBlock &BB : F->getBasicBlockList()) {
+ if (&BB == LBB) {
+ assert(&BB != RBB);
+ return -1;
+ }
+ if (&BB == RBB)
+ return 1;
+ }
+ llvm_unreachable("Basic Block Address does not point to a basic block in "
+ "its function.");
+ return -1;
+ } else {
+ // cmpValues said the functions are the same. So because they aren't
+ // literally the same pointer, they must respectively be the left and
+ // right functions.
+ assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR);
+ // cmpValues will tell us if these are equivalent BasicBlocks, in the
+ // context of their respective functions.
+ return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock());
+ }
+ }
+ default: // Unknown constant, abort.
+ DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n");
+ llvm_unreachable("Constant ValueID not recognized.");
+ return -1;
+ }
+}
+
+int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue *R) const {
+ uint64_t LNumber = GlobalNumbers->getNumber(L);
+ uint64_t RNumber = GlobalNumbers->getNumber(R);
+ return cmpNumbers(LNumber, RNumber);
+}
+
+/// cmpType - compares two types,
+/// defines total ordering among the types set.
+/// See method declaration comments for more details.
+int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
+ PointerType *PTyL = dyn_cast<PointerType>(TyL);
+ PointerType *PTyR = dyn_cast<PointerType>(TyR);
+
+ const DataLayout &DL = FnL->getParent()->getDataLayout();
+ if (PTyL && PTyL->getAddressSpace() == 0)
+ TyL = DL.getIntPtrType(TyL);
+ if (PTyR && PTyR->getAddressSpace() == 0)
+ TyR = DL.getIntPtrType(TyR);
+
+ if (TyL == TyR)
+ return 0;
+
+ if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID()))
+ return Res;
+
+ switch (TyL->getTypeID()) {
+ default:
+ llvm_unreachable("Unknown type!");
+ // Fall through in Release mode.
+ LLVM_FALLTHROUGH;
+ case Type::IntegerTyID:
+ return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(),
+ cast<IntegerType>(TyR)->getBitWidth());
+ // TyL == TyR would have returned true earlier, because types are uniqued.
+ case Type::VoidTyID:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ case Type::LabelTyID:
+ case Type::MetadataTyID:
+ case Type::TokenTyID:
+ return 0;
+
+ case Type::PointerTyID: {
+ assert(PTyL && PTyR && "Both types must be pointers here.");
+ return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace());
+ }
+
+ case Type::StructTyID: {
+ StructType *STyL = cast<StructType>(TyL);
+ StructType *STyR = cast<StructType>(TyR);
+ if (STyL->getNumElements() != STyR->getNumElements())
+ return cmpNumbers(STyL->getNumElements(), STyR->getNumElements());
+
+ if (STyL->isPacked() != STyR->isPacked())
+ return cmpNumbers(STyL->isPacked(), STyR->isPacked());
+
+ for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) {
+ if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i)))
+ return Res;
+ }
+ return 0;
+ }
+
+ case Type::FunctionTyID: {
+ FunctionType *FTyL = cast<FunctionType>(TyL);
+ FunctionType *FTyR = cast<FunctionType>(TyR);
+ if (FTyL->getNumParams() != FTyR->getNumParams())
+ return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams());
+
+ if (FTyL->isVarArg() != FTyR->isVarArg())
+ return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg());
+
+ if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType()))
+ return Res;
+
+ for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) {
+ if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i)))
+ return Res;
+ }
+ return 0;
+ }
+
+ case Type::ArrayTyID:
+ case Type::VectorTyID: {
+ auto *STyL = cast<SequentialType>(TyL);
+ auto *STyR = cast<SequentialType>(TyR);
+ if (STyL->getNumElements() != STyR->getNumElements())
+ return cmpNumbers(STyL->getNumElements(), STyR->getNumElements());
+ return cmpTypes(STyL->getElementType(), STyR->getElementType());
+ }
+ }
+}
+
+// Determine whether the two operations are the same except that pointer-to-A
+// and pointer-to-B are equivalent. This should be kept in sync with
+// Instruction::isSameOperationAs.
+// Read method declaration comments for more details.
+int FunctionComparator::cmpOperations(const Instruction *L,
+ const Instruction *R,
+ bool &needToCmpOperands) const {
+ needToCmpOperands = true;
+ if (int Res = cmpValues(L, R))
+ return Res;
+
+ // Differences from Instruction::isSameOperationAs:
+ // * replace type comparison with calls to cmpTypes.
+ // * we test for I->getRawSubclassOptionalData (nuw/nsw/tail) at the top.
+ // * because of the above, we don't test for the tail bit on calls later on.
+ if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode()))
+ return Res;
+
+ if (const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(L)) {
+ needToCmpOperands = false;
+ const GetElementPtrInst *GEPR = cast<GetElementPtrInst>(R);
+ if (int Res =
+ cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand()))
+ return Res;
+ return cmpGEPs(GEPL, GEPR);
+ }
+
+ if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
+ return Res;
+
+ if (int Res = cmpTypes(L->getType(), R->getType()))
+ return Res;
+
+ if (int Res = cmpNumbers(L->getRawSubclassOptionalData(),
+ R->getRawSubclassOptionalData()))
+ return Res;
+
+ // We have two instructions of identical opcode and #operands. Check to see
+ // if all operands are the same type
+ for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
+ if (int Res =
+ cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType()))
+ return Res;
+ }
+
+ // Check special state that is a part of some instructions.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) {
+ if (int Res = cmpTypes(AI->getAllocatedType(),
+ cast<AllocaInst>(R)->getAllocatedType()))
+ return Res;
+ return cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment());
+ }
+ if (const LoadInst *LI = dyn_cast<LoadInst>(L)) {
+ if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile()))
+ return Res;
+ if (int Res =
+ cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment()))
+ return Res;
+ if (int Res =
+ cmpOrderings(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))
+ return Res;
+ if (int Res = cmpNumbers(LI->getSyncScopeID(),
+ cast<LoadInst>(R)->getSyncScopeID()))
+ return Res;
+ return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range),
+ cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
+ }
+ if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
+ if (int Res =
+ cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile()))
+ return Res;
+ if (int Res =
+ cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment()))
+ return Res;
+ if (int Res =
+ cmpOrderings(SI->getOrdering(), cast<StoreInst>(R)->getOrdering()))
+ return Res;
+ return cmpNumbers(SI->getSyncScopeID(),
+ cast<StoreInst>(R)->getSyncScopeID());
+ }
+ if (const CmpInst *CI = dyn_cast<CmpInst>(L))
+ return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate());
+ if (const CallInst *CI = dyn_cast<CallInst>(L)) {
+ if (int Res = cmpNumbers(CI->getCallingConv(),
+ cast<CallInst>(R)->getCallingConv()))
+ return Res;
+ if (int Res =
+ cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()))
+ return Res;
+ if (int Res = cmpOperandBundlesSchema(CI, R))
+ return Res;
+ return cmpRangeMetadata(
+ CI->getMetadata(LLVMContext::MD_range),
+ cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
+ }
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(L)) {
+ if (int Res = cmpNumbers(II->getCallingConv(),
+ cast<InvokeInst>(R)->getCallingConv()))
+ return Res;
+ if (int Res =
+ cmpAttrs(II->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
+ return Res;
+ if (int Res = cmpOperandBundlesSchema(II, R))
+ return Res;
+ return cmpRangeMetadata(
+ II->getMetadata(LLVMContext::MD_range),
+ cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
+ }
+ if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
+ ArrayRef<unsigned> LIndices = IVI->getIndices();
+ ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices();
+ if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
+ return Res;
+ for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
+ if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
+ return Res;
+ }
+ return 0;
+ }
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) {
+ ArrayRef<unsigned> LIndices = EVI->getIndices();
+ ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices();
+ if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
+ return Res;
+ for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
+ if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
+ return Res;
+ }
+ }
+ if (const FenceInst *FI = dyn_cast<FenceInst>(L)) {
+ if (int Res =
+ cmpOrderings(FI->getOrdering(), cast<FenceInst>(R)->getOrdering()))
+ return Res;
+ return cmpNumbers(FI->getSyncScopeID(),
+ cast<FenceInst>(R)->getSyncScopeID());
+ }
+ if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) {
+ if (int Res = cmpNumbers(CXI->isVolatile(),
+ cast<AtomicCmpXchgInst>(R)->isVolatile()))
+ return Res;
+ if (int Res = cmpNumbers(CXI->isWeak(),
+ cast<AtomicCmpXchgInst>(R)->isWeak()))
+ return Res;
+ if (int Res =
+ cmpOrderings(CXI->getSuccessOrdering(),
+ cast<AtomicCmpXchgInst>(R)->getSuccessOrdering()))
+ return Res;
+ if (int Res =
+ cmpOrderings(CXI->getFailureOrdering(),
+ cast<AtomicCmpXchgInst>(R)->getFailureOrdering()))
+ return Res;
+ return cmpNumbers(CXI->getSyncScopeID(),
+ cast<AtomicCmpXchgInst>(R)->getSyncScopeID());
+ }
+ if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) {
+ if (int Res = cmpNumbers(RMWI->getOperation(),
+ cast<AtomicRMWInst>(R)->getOperation()))
+ return Res;
+ if (int Res = cmpNumbers(RMWI->isVolatile(),
+ cast<AtomicRMWInst>(R)->isVolatile()))
+ return Res;
+ if (int Res = cmpOrderings(RMWI->getOrdering(),
+ cast<AtomicRMWInst>(R)->getOrdering()))
+ return Res;
+ return cmpNumbers(RMWI->getSyncScopeID(),
+ cast<AtomicRMWInst>(R)->getSyncScopeID());
+ }
+ if (const PHINode *PNL = dyn_cast<PHINode>(L)) {
+ const PHINode *PNR = cast<PHINode>(R);
+ // Ensure that in addition to the incoming values being identical
+ // (checked by the caller of this function), the incoming blocks
+ // are also identical.
+ for (unsigned i = 0, e = PNL->getNumIncomingValues(); i != e; ++i) {
+ if (int Res =
+ cmpValues(PNL->getIncomingBlock(i), PNR->getIncomingBlock(i)))
+ return Res;
+ }
+ }
+ return 0;
+}
+
+// Determine whether two GEP operations perform the same underlying arithmetic.
+// Read method declaration comments for more details.
+int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
+ const GEPOperator *GEPR) const {
+
+ unsigned int ASL = GEPL->getPointerAddressSpace();
+ unsigned int ASR = GEPR->getPointerAddressSpace();
+
+ if (int Res = cmpNumbers(ASL, ASR))
+ return Res;
+
+ // When we have target data, we can reduce the GEP down to the value in bytes
+ // added to the address.
+ const DataLayout &DL = FnL->getParent()->getDataLayout();
+ unsigned BitWidth = DL.getPointerSizeInBits(ASL);
+ APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
+ if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
+ GEPR->accumulateConstantOffset(DL, OffsetR))
+ return cmpAPInts(OffsetL, OffsetR);
+ if (int Res = cmpTypes(GEPL->getSourceElementType(),
+ GEPR->getSourceElementType()))
+ return Res;
+
+ if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands()))
+ return Res;
+
+ for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) {
+ if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i)))
+ return Res;
+ }
+
+ return 0;
+}
+
+int FunctionComparator::cmpInlineAsm(const InlineAsm *L,
+ const InlineAsm *R) const {
+ // InlineAsm's are uniqued. If they are the same pointer, obviously they are
+ // the same, otherwise compare the fields.
+ if (L == R)
+ return 0;
+ if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType()))
+ return Res;
+ if (int Res = cmpMem(L->getAsmString(), R->getAsmString()))
+ return Res;
+ if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString()))
+ return Res;
+ if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects()))
+ return Res;
+ if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack()))
+ return Res;
+ if (int Res = cmpNumbers(L->getDialect(), R->getDialect()))
+ return Res;
+ llvm_unreachable("InlineAsm blocks were not uniqued.");
+ return 0;
+}
+
+/// Compare two values used by the two functions under pair-wise comparison. If
+/// this is the first time the values are seen, they're added to the mapping so
+/// that we will detect mismatches on next use.
+/// See comments in declaration for more details.
+int FunctionComparator::cmpValues(const Value *L, const Value *R) const {
+ // Catch self-reference case.
+ if (L == FnL) {
+ if (R == FnR)
+ return 0;
+ return -1;
+ }
+ if (R == FnR) {
+ if (L == FnL)
+ return 0;
+ return 1;
+ }
+
+ const Constant *ConstL = dyn_cast<Constant>(L);
+ const Constant *ConstR = dyn_cast<Constant>(R);
+ if (ConstL && ConstR) {
+ if (L == R)
+ return 0;
+ return cmpConstants(ConstL, ConstR);
+ }
+
+ if (ConstL)
+ return 1;
+ if (ConstR)
+ return -1;
+
+ const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L);
+ const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
+
+ if (InlineAsmL && InlineAsmR)
+ return cmpInlineAsm(InlineAsmL, InlineAsmR);
+ if (InlineAsmL)
+ return 1;
+ if (InlineAsmR)
+ return -1;
+
+ auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())),
+ RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size()));
+
+ return cmpNumbers(LeftSN.first->second, RightSN.first->second);
+}
+
+// Test whether two basic blocks have equivalent behaviour.
+int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL,
+ const BasicBlock *BBR) const {
+ BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end();
+ BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end();
+
+ do {
+ bool needToCmpOperands = true;
+ if (int Res = cmpOperations(&*InstL, &*InstR, needToCmpOperands))
+ return Res;
+ if (needToCmpOperands) {
+ assert(InstL->getNumOperands() == InstR->getNumOperands());
+
+ for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) {
+ Value *OpL = InstL->getOperand(i);
+ Value *OpR = InstR->getOperand(i);
+ if (int Res = cmpValues(OpL, OpR))
+ return Res;
+ // cmpValues should ensure this is true.
+ assert(cmpTypes(OpL->getType(), OpR->getType()) == 0);
+ }
+ }
+
+ ++InstL;
+ ++InstR;
+ } while (InstL != InstLE && InstR != InstRE);
+
+ if (InstL != InstLE && InstR == InstRE)
+ return 1;
+ if (InstL == InstLE && InstR != InstRE)
+ return -1;
+ return 0;
+}
+
+int FunctionComparator::compareSignature() const {
+ if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes()))
+ return Res;
+
+ if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC()))
+ return Res;
+
+ if (FnL->hasGC()) {
+ if (int Res = cmpMem(FnL->getGC(), FnR->getGC()))
+ return Res;
+ }
+
+ if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection()))
+ return Res;
+
+ if (FnL->hasSection()) {
+ if (int Res = cmpMem(FnL->getSection(), FnR->getSection()))
+ return Res;
+ }
+
+ if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg()))
+ return Res;
+
+ // TODO: if it's internal and only used in direct calls, we could handle this
+ // case too.
+ if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv()))
+ return Res;
+
+ if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType()))
+ return Res;
+
+ assert(FnL->arg_size() == FnR->arg_size() &&
+ "Identically typed functions have different numbers of args!");
+
+ // Visit the arguments so that they get enumerated in the order they're
+ // passed in.
+ for (Function::const_arg_iterator ArgLI = FnL->arg_begin(),
+ ArgRI = FnR->arg_begin(),
+ ArgLE = FnL->arg_end();
+ ArgLI != ArgLE; ++ArgLI, ++ArgRI) {
+ if (cmpValues(&*ArgLI, &*ArgRI) != 0)
+ llvm_unreachable("Arguments repeat!");
+ }
+ return 0;
+}
+
+// Test whether the two functions have equivalent behaviour.
+int FunctionComparator::compare() {
+ beginCompare();
+
+ if (int Res = compareSignature())
+ return Res;
+
+ // We do a CFG-ordered walk since the actual ordering of the blocks in the
+ // linked list is immaterial. Our walk starts at the entry block for both
+ // functions, then takes each block from each terminator in order. As an
+ // artifact, this also means that unreachable blocks are ignored.
+ SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs;
+ SmallPtrSet<const BasicBlock *, 32> VisitedBBs; // in terms of F1.
+
+ FnLBBs.push_back(&FnL->getEntryBlock());
+ FnRBBs.push_back(&FnR->getEntryBlock());
+
+ VisitedBBs.insert(FnLBBs[0]);
+ while (!FnLBBs.empty()) {
+ const BasicBlock *BBL = FnLBBs.pop_back_val();
+ const BasicBlock *BBR = FnRBBs.pop_back_val();
+
+ if (int Res = cmpValues(BBL, BBR))
+ return Res;
+
+ if (int Res = cmpBasicBlocks(BBL, BBR))
+ return Res;
+
+ const TerminatorInst *TermL = BBL->getTerminator();
+ const TerminatorInst *TermR = BBR->getTerminator();
+
+ assert(TermL->getNumSuccessors() == TermR->getNumSuccessors());
+ for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) {
+ if (!VisitedBBs.insert(TermL->getSuccessor(i)).second)
+ continue;
+
+ FnLBBs.push_back(TermL->getSuccessor(i));
+ FnRBBs.push_back(TermR->getSuccessor(i));
+ }
+ }
+ return 0;
+}
+
+namespace {
+
+// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
+// hash of a sequence of 64bit ints, but the entire input does not need to be
+// available at once. This interface is necessary for functionHash because it
+// needs to accumulate the hash as the structure of the function is traversed
+// without saving these values to an intermediate buffer. This form of hashing
+// is not often needed, as usually the object to hash is just read from a
+// buffer.
+class HashAccumulator64 {
+ uint64_t Hash;
+public:
+ // Initialize to random constant, so the state isn't zero.
+ HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
+ void add(uint64_t V) {
+ Hash = llvm::hashing::detail::hash_16_bytes(Hash, V);
+ }
+ // No finishing is required, because the entire hash value is used.
+ uint64_t getHash() { return Hash; }
+};
+} // end anonymous namespace
+
+// A function hash is calculated by considering only the number of arguments and
+// whether a function is varargs, the order of basic blocks (given by the
+// successors of each basic block in depth first order), and the order of
+// opcodes of each instruction within each of these basic blocks. This mirrors
+// the strategy compare() uses to compare functions by walking the BBs in depth
+// first order and comparing each instruction in sequence. Because this hash
+// does not look at the operands, it is insensitive to things such as the
+// target of calls and the constants used in the function, which makes it useful
+// when possibly merging functions which are the same modulo constants and call
+// targets.
+FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
+ HashAccumulator64 H;
+ H.add(F.isVarArg());
+ H.add(F.arg_size());
+
+ SmallVector<const BasicBlock *, 8> BBs;
+ SmallSet<const BasicBlock *, 16> VisitedBBs;
+
+ // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
+ // accumulating the hash of the function "structure." (BB and opcode sequence)
+ BBs.push_back(&F.getEntryBlock());
+ VisitedBBs.insert(BBs[0]);
+ while (!BBs.empty()) {
+ const BasicBlock *BB = BBs.pop_back_val();
+ // This random value acts as a block header, as otherwise the partition of
+ // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
+ H.add(45798);
+ for (auto &Inst : *BB) {
+ H.add(Inst.getOpcode());
+ }
+ const TerminatorInst *Term = BB->getTerminator();
+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+ if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
+ continue;
+ BBs.push_back(Term->getSuccessor(i));
+ }
+ }
+ return H.getHash();
+}
+
+
diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
new file mode 100644
index 000000000000..a98d07237b47
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -0,0 +1,262 @@
+//===- lib/Transforms/Utils/FunctionImportUtils.cpp - Importing utilities -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the FunctionImportGlobalProcessing class, used
+// to perform the necessary global value handling for function importing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/FunctionImportUtils.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+using namespace llvm;
+
+/// Checks if we should import SGV as a definition, otherwise import as a
+/// declaration.
+bool FunctionImportGlobalProcessing::doImportAsDefinition(
+ const GlobalValue *SGV, SetVector<GlobalValue *> *GlobalsToImport) {
+
+ // For alias, we tie the definition to the base object. Extract it and recurse
+ if (auto *GA = dyn_cast<GlobalAlias>(SGV)) {
+ if (GA->isInterposable())
+ return false;
+ const GlobalObject *GO = GA->getBaseObject();
+ if (!GO->hasLinkOnceODRLinkage())
+ return false;
+ return FunctionImportGlobalProcessing::doImportAsDefinition(
+ GO, GlobalsToImport);
+ }
+ // Only import the globals requested for importing.
+ if (GlobalsToImport->count(const_cast<GlobalValue *>(SGV)))
+ return true;
+ // Otherwise no.
+ return false;
+}
+
+bool FunctionImportGlobalProcessing::doImportAsDefinition(
+ const GlobalValue *SGV) {
+ if (!isPerformingImport())
+ return false;
+ return FunctionImportGlobalProcessing::doImportAsDefinition(SGV,
+ GlobalsToImport);
+}
+
+bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
+ const GlobalValue *SGV) {
+ assert(SGV->hasLocalLinkage());
+ // Both the imported references and the original local variable must
+ // be promoted.
+ if (!isPerformingImport() && !isModuleExporting())
+ return false;
+
+ if (isPerformingImport()) {
+ assert((!GlobalsToImport->count(const_cast<GlobalValue *>(SGV)) ||
+ !isNonRenamableLocal(*SGV)) &&
+ "Attempting to promote non-renamable local");
+ // We don't know for sure yet if we are importing this value (as either
+ // a reference or a def), since we are simply walking all values in the
+ // module. But by necessity if we end up importing it and it is local,
+ // it must be promoted, so unconditionally promote all values in the
+ // importing module.
+ return true;
+ }
+
+ // When exporting, consult the index. We can have more than one local
+ // with the same GUID, in the case of same-named locals in different but
+ // same-named source files that were compiled in their respective directories
+ // (so the source file name and resulting GUID is the same). Find the one
+ // in this module.
+ auto Summary = ImportIndex.findSummaryInModule(
+ SGV->getGUID(), SGV->getParent()->getModuleIdentifier());
+ assert(Summary && "Missing summary for global value when exporting");
+ auto Linkage = Summary->linkage();
+ if (!GlobalValue::isLocalLinkage(Linkage)) {
+ assert(!isNonRenamableLocal(*SGV) &&
+ "Attempting to promote non-renamable local");
+ return true;
+ }
+
+ return false;
+}
+
+#ifndef NDEBUG
+bool FunctionImportGlobalProcessing::isNonRenamableLocal(
+ const GlobalValue &GV) const {
+ if (!GV.hasLocalLinkage())
+ return false;
+ // This needs to stay in sync with the logic in buildModuleSummaryIndex.
+ if (GV.hasSection())
+ return true;
+ if (Used.count(const_cast<GlobalValue *>(&GV)))
+ return true;
+ return false;
+}
+#endif
+
+std::string FunctionImportGlobalProcessing::getName(const GlobalValue *SGV,
+ bool DoPromote) {
+ // For locals that must be promoted to global scope, ensure that
+ // the promoted name uniquely identifies the copy in the original module,
+ // using the ID assigned during combined index creation. When importing,
+ // we rename all locals (not just those that are promoted) in order to
+ // avoid naming conflicts between locals imported from different modules.
+ if (SGV->hasLocalLinkage() && (DoPromote || isPerformingImport()))
+ return ModuleSummaryIndex::getGlobalNameForLocal(
+ SGV->getName(),
+ ImportIndex.getModuleHash(SGV->getParent()->getModuleIdentifier()));
+ return SGV->getName();
+}
+
+GlobalValue::LinkageTypes
+FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
+ bool DoPromote) {
+ // Any local variable that is referenced by an exported function needs
+ // to be promoted to global scope. Since we don't currently know which
+ // functions reference which local variables/functions, we must treat
+ // all as potentially exported if this module is exporting anything.
+ if (isModuleExporting()) {
+ if (SGV->hasLocalLinkage() && DoPromote)
+ return GlobalValue::ExternalLinkage;
+ return SGV->getLinkage();
+ }
+
+ // Otherwise, if we aren't importing, no linkage change is needed.
+ if (!isPerformingImport())
+ return SGV->getLinkage();
+
+ switch (SGV->getLinkage()) {
+ case GlobalValue::ExternalLinkage:
+ // External defnitions are converted to available_externally
+ // definitions upon import, so that they are available for inlining
+ // and/or optimization, but are turned into declarations later
+ // during the EliminateAvailableExternally pass.
+ if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+ return GlobalValue::AvailableExternallyLinkage;
+ // An imported external declaration stays external.
+ return SGV->getLinkage();
+
+ case GlobalValue::AvailableExternallyLinkage:
+ // An imported available_externally definition converts
+ // to external if imported as a declaration.
+ if (!doImportAsDefinition(SGV))
+ return GlobalValue::ExternalLinkage;
+ // An imported available_externally declaration stays that way.
+ return SGV->getLinkage();
+
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ // These both stay the same when importing the definition.
+ // The ThinLTO pass will eventually force-import their definitions.
+ return SGV->getLinkage();
+
+ case GlobalValue::WeakAnyLinkage:
+ // Can't import weak_any definitions correctly, or we might change the
+ // program semantics, since the linker will pick the first weak_any
+ // definition and importing would change the order they are seen by the
+ // linker. The module linking caller needs to enforce this.
+ assert(!doImportAsDefinition(SGV));
+ // If imported as a declaration, it becomes external_weak.
+ return SGV->getLinkage();
+
+ case GlobalValue::WeakODRLinkage:
+ // For weak_odr linkage, there is a guarantee that all copies will be
+ // equivalent, so the issue described above for weak_any does not exist,
+ // and the definition can be imported. It can be treated similarly
+ // to an imported externally visible global value.
+ if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+ return GlobalValue::AvailableExternallyLinkage;
+ else
+ return GlobalValue::ExternalLinkage;
+
+ case GlobalValue::AppendingLinkage:
+ // It would be incorrect to import an appending linkage variable,
+ // since it would cause global constructors/destructors to be
+ // executed multiple times. This should have already been handled
+ // by linkIfNeeded, and we will assert in shouldLinkFromSource
+ // if we try to import, so we simply return AppendingLinkage.
+ return GlobalValue::AppendingLinkage;
+
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
+ // If we are promoting the local to global scope, it is handled
+ // similarly to a normal externally visible global.
+ if (DoPromote) {
+ if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+ return GlobalValue::AvailableExternallyLinkage;
+ else
+ return GlobalValue::ExternalLinkage;
+ }
+ // A non-promoted imported local definition stays local.
+ // The ThinLTO pass will eventually force-import their definitions.
+ return SGV->getLinkage();
+
+ case GlobalValue::ExternalWeakLinkage:
+ // External weak doesn't apply to definitions, must be a declaration.
+ assert(!doImportAsDefinition(SGV));
+ // Linkage stays external_weak.
+ return SGV->getLinkage();
+
+ case GlobalValue::CommonLinkage:
+ // Linkage stays common on definitions.
+ // The ThinLTO pass will eventually force-import their definitions.
+ return SGV->getLinkage();
+ }
+
+ llvm_unreachable("unknown linkage type");
+}
+
+void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
+ bool DoPromote = false;
+ if (GV.hasLocalLinkage() &&
+ ((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) {
+ // Once we change the name or linkage it is difficult to determine
+ // again whether we should promote since shouldPromoteLocalToGlobal needs
+ // to locate the summary (based on GUID from name and linkage). Therefore,
+ // use DoPromote result saved above.
+ GV.setName(getName(&GV, DoPromote));
+ GV.setLinkage(getLinkage(&GV, DoPromote));
+ if (!GV.hasLocalLinkage())
+ GV.setVisibility(GlobalValue::HiddenVisibility);
+ } else
+ GV.setLinkage(getLinkage(&GV, /* DoPromote */ false));
+
+ // Remove functions imported as available externally defs from comdats,
+ // as this is a declaration for the linker, and will be dropped eventually.
+ // It is illegal for comdats to contain declarations.
+ auto *GO = dyn_cast_or_null<GlobalObject>(&GV);
+ if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
+ // The IRMover should not have placed any imported declarations in
+ // a comdat, so the only declaration that should be in a comdat
+ // at this point would be a definition imported as available_externally.
+ assert(GO->hasAvailableExternallyLinkage() &&
+ "Expected comdat on definition (possibly available external)");
+ GO->setComdat(nullptr);
+ }
+}
+
+void FunctionImportGlobalProcessing::processGlobalsForThinLTO() {
+ for (GlobalVariable &GV : M.globals())
+ processGlobalForThinLTO(GV);
+ for (Function &SF : M)
+ processGlobalForThinLTO(SF);
+ for (GlobalAlias &GA : M.aliases())
+ processGlobalForThinLTO(GA);
+}
+
+bool FunctionImportGlobalProcessing::run() {
+ processGlobalsForThinLTO();
+ return false;
+}
+
+bool llvm::renameModuleForThinLTO(Module &M, const ModuleSummaryIndex &Index,
+ SetVector<GlobalValue *> *GlobalsToImport) {
+ FunctionImportGlobalProcessing ThinLTOProcessing(M, Index, GlobalsToImport);
+ return ThinLTOProcessing.run();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
new file mode 100644
index 000000000000..245fefb38ee8
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -0,0 +1,196 @@
+//===-- GlobalStatus.cpp - Compute status info for globals -----------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include <algorithm>
+#include <cassert>
+
+using namespace llvm;
+
+/// Return the stronger of the two ordering. If the two orderings are acquire
+/// and release, then return AcquireRelease.
+///
+static AtomicOrdering strongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
+ if ((X == AtomicOrdering::Acquire && Y == AtomicOrdering::Release) ||
+ (Y == AtomicOrdering::Acquire && X == AtomicOrdering::Release))
+ return AtomicOrdering::AcquireRelease;
+ return (AtomicOrdering)std::max((unsigned)X, (unsigned)Y);
+}
+
+/// It is safe to destroy a constant iff it is only used by constants itself.
+/// Note that constants cannot be cyclic, so this test is pretty easy to
+/// implement recursively.
+///
+bool llvm::isSafeToDestroyConstant(const Constant *C) {
+ if (isa<GlobalValue>(C))
+ return false;
+
+ if (isa<ConstantData>(C))
+ return false;
+
+ for (const User *U : C->users())
+ if (const Constant *CU = dyn_cast<Constant>(U)) {
+ if (!isSafeToDestroyConstant(CU))
+ return false;
+ } else
+ return false;
+ return true;
+}
+
+static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
+ SmallPtrSetImpl<const PHINode *> &PhiUsers) {
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ if (GV->isExternallyInitialized())
+ GS.StoredType = GlobalStatus::StoredOnce;
+
+ for (const Use &U : V->uses()) {
+ const User *UR = U.getUser();
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) {
+ GS.HasNonInstructionUser = true;
+
+ // If the result of the constantexpr isn't pointer type, then we won't
+ // know to expect it in various places. Just reject early.
+ if (!isa<PointerType>(CE->getType()))
+ return true;
+
+ if (analyzeGlobalAux(CE, GS, PhiUsers))
+ return true;
+ } else if (const Instruction *I = dyn_cast<Instruction>(UR)) {
+ if (!GS.HasMultipleAccessingFunctions) {
+ const Function *F = I->getParent()->getParent();
+ if (!GS.AccessingFunction)
+ GS.AccessingFunction = F;
+ else if (GS.AccessingFunction != F)
+ GS.HasMultipleAccessingFunctions = true;
+ }
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ GS.IsLoaded = true;
+ // Don't hack on volatile loads.
+ if (LI->isVolatile())
+ return true;
+ GS.Ordering = strongerOrdering(GS.Ordering, LI->getOrdering());
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Don't allow a store OF the address, only stores TO the address.
+ if (SI->getOperand(0) == V)
+ return true;
+
+ // Don't hack on volatile stores.
+ if (SI->isVolatile())
+ return true;
+
+ GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering());
+
+ // If this is a direct store to the global (i.e., the global is a scalar
+ // value, not an aggregate), keep more specific information about
+ // stores.
+ if (GS.StoredType != GlobalStatus::Stored) {
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(SI->getOperand(1))) {
+ Value *StoredVal = SI->getOperand(0);
+
+ if (Constant *C = dyn_cast<Constant>(StoredVal)) {
+ if (C->isThreadDependent()) {
+ // The stored value changes between threads; don't track it.
+ return true;
+ }
+ }
+
+ if (GV->hasInitializer() && StoredVal == GV->getInitializer()) {
+ if (GS.StoredType < GlobalStatus::InitializerStored)
+ GS.StoredType = GlobalStatus::InitializerStored;
+ } else if (isa<LoadInst>(StoredVal) &&
+ cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
+ if (GS.StoredType < GlobalStatus::InitializerStored)
+ GS.StoredType = GlobalStatus::InitializerStored;
+ } else if (GS.StoredType < GlobalStatus::StoredOnce) {
+ GS.StoredType = GlobalStatus::StoredOnce;
+ GS.StoredOnceValue = StoredVal;
+ } else if (GS.StoredType == GlobalStatus::StoredOnce &&
+ GS.StoredOnceValue == StoredVal) {
+ // noop.
+ } else {
+ GS.StoredType = GlobalStatus::Stored;
+ }
+ } else {
+ GS.StoredType = GlobalStatus::Stored;
+ }
+ }
+ } else if (isa<BitCastInst>(I)) {
+ if (analyzeGlobalAux(I, GS, PhiUsers))
+ return true;
+ } else if (isa<GetElementPtrInst>(I)) {
+ if (analyzeGlobalAux(I, GS, PhiUsers))
+ return true;
+ } else if (isa<SelectInst>(I)) {
+ if (analyzeGlobalAux(I, GS, PhiUsers))
+ return true;
+ } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
+ // PHI nodes we can check just like select or GEP instructions, but we
+ // have to be careful about infinite recursion.
+ if (PhiUsers.insert(PN).second) // Not already visited.
+ if (analyzeGlobalAux(I, GS, PhiUsers))
+ return true;
+ } else if (isa<CmpInst>(I)) {
+ GS.IsCompared = true;
+ } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
+ if (MTI->isVolatile())
+ return true;
+ if (MTI->getArgOperand(0) == V)
+ GS.StoredType = GlobalStatus::Stored;
+ if (MTI->getArgOperand(1) == V)
+ GS.IsLoaded = true;
+ } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
+ assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
+ if (MSI->isVolatile())
+ return true;
+ GS.StoredType = GlobalStatus::Stored;
+ } else if (auto C = ImmutableCallSite(I)) {
+ if (!C.isCallee(&U))
+ return true;
+ GS.IsLoaded = true;
+ } else {
+ return true; // Any other non-load instruction might take address!
+ }
+ } else if (const Constant *C = dyn_cast<Constant>(UR)) {
+ GS.HasNonInstructionUser = true;
+ // We might have a dead and dangling constant hanging off of here.
+ if (!isSafeToDestroyConstant(C))
+ return true;
+ } else {
+ GS.HasNonInstructionUser = true;
+ // Otherwise must be some other user.
+ return true;
+ }
+ }
+
+ return false;
+}
+
+GlobalStatus::GlobalStatus() = default;
+
+bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) {
+ SmallPtrSet<const PHINode *, 16> PhiUsers;
+ return analyzeGlobalAux(V, GS, PhiUsers);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
new file mode 100644
index 000000000000..b8c12ad5ea84
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
@@ -0,0 +1,205 @@
+//===-- ImportedFunctionsInliningStats.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Generating inliner statistics for imported functions, mostly useful for
+// ThinLTO.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <iomanip>
+#include <sstream>
+using namespace llvm;
+
+ImportedFunctionsInliningStatistics::InlineGraphNode &
+ImportedFunctionsInliningStatistics::createInlineGraphNode(const Function &F) {
+
+ auto &ValueLookup = NodesMap[F.getName()];
+ if (!ValueLookup) {
+ ValueLookup = llvm::make_unique<InlineGraphNode>();
+ ValueLookup->Imported = F.getMetadata("thinlto_src_module") != nullptr;
+ }
+ return *ValueLookup;
+}
+
+void ImportedFunctionsInliningStatistics::recordInline(const Function &Caller,
+ const Function &Callee) {
+
+ InlineGraphNode &CallerNode = createInlineGraphNode(Caller);
+ InlineGraphNode &CalleeNode = createInlineGraphNode(Callee);
+ CalleeNode.NumberOfInlines++;
+
+ if (!CallerNode.Imported && !CalleeNode.Imported) {
+ // Direct inline from not imported callee to not imported caller, so we
+ // don't have to add this to graph. It might be very helpful if you wanna
+ // get the inliner statistics in compile step where there are no imported
+ // functions. In this case the graph would be empty.
+ CalleeNode.NumberOfRealInlines++;
+ return;
+ }
+
+ CallerNode.InlinedCallees.push_back(&CalleeNode);
+ if (!CallerNode.Imported) {
+ // We could avoid second lookup, but it would make the code ultra ugly.
+ auto It = NodesMap.find(Caller.getName());
+ assert(It != NodesMap.end() && "The node should be already there.");
+ // Save Caller as a starting node for traversal. The string has to be one
+ // from map because Caller can disappear (and function name with it).
+ NonImportedCallers.push_back(It->first());
+ }
+}
+
+void ImportedFunctionsInliningStatistics::setModuleInfo(const Module &M) {
+ ModuleName = M.getName();
+ for (const auto &F : M.functions()) {
+ if (F.isDeclaration())
+ continue;
+ AllFunctions++;
+ ImportedFunctions += int(F.getMetadata("thinlto_src_module") != nullptr);
+ }
+}
+static std::string getStatString(const char *Msg, int32_t Fraction, int32_t All,
+ const char *PercentageOfMsg,
+ bool LineEnd = true) {
+ double Result = 0;
+ if (All != 0)
+ Result = 100 * static_cast<double>(Fraction) / All;
+
+ std::stringstream Str;
+ Str << std::setprecision(4) << Msg << ": " << Fraction << " [" << Result
+ << "% of " << PercentageOfMsg << "]";
+ if (LineEnd)
+ Str << "\n";
+ return Str.str();
+}
+
+void ImportedFunctionsInliningStatistics::dump(const bool Verbose) {
+ calculateRealInlines();
+ NonImportedCallers.clear();
+
+ int32_t InlinedImportedFunctionsCount = 0;
+ int32_t InlinedNotImportedFunctionsCount = 0;
+
+ int32_t InlinedImportedFunctionsToImportingModuleCount = 0;
+ int32_t InlinedNotImportedFunctionsToImportingModuleCount = 0;
+
+ const auto SortedNodes = getSortedNodes();
+ std::string Out;
+ Out.reserve(5000);
+ raw_string_ostream Ostream(Out);
+
+ Ostream << "------- Dumping inliner stats for [" << ModuleName
+ << "] -------\n";
+
+ if (Verbose)
+ Ostream << "-- List of inlined functions:\n";
+
+ for (const auto &Node : SortedNodes) {
+ assert(Node->second->NumberOfInlines >= Node->second->NumberOfRealInlines);
+ if (Node->second->NumberOfInlines == 0)
+ continue;
+
+ if (Node->second->Imported) {
+ InlinedImportedFunctionsCount++;
+ InlinedImportedFunctionsToImportingModuleCount +=
+ int(Node->second->NumberOfRealInlines > 0);
+ } else {
+ InlinedNotImportedFunctionsCount++;
+ InlinedNotImportedFunctionsToImportingModuleCount +=
+ int(Node->second->NumberOfRealInlines > 0);
+ }
+
+ if (Verbose)
+ Ostream << "Inlined "
+ << (Node->second->Imported ? "imported " : "not imported ")
+ << "function [" << Node->first() << "]"
+ << ": #inlines = " << Node->second->NumberOfInlines
+ << ", #inlines_to_importing_module = "
+ << Node->second->NumberOfRealInlines << "\n";
+ }
+
+ auto InlinedFunctionsCount =
+ InlinedImportedFunctionsCount + InlinedNotImportedFunctionsCount;
+ auto NotImportedFuncCount = AllFunctions - ImportedFunctions;
+ auto ImportedNotInlinedIntoModule =
+ ImportedFunctions - InlinedImportedFunctionsToImportingModuleCount;
+
+ Ostream << "-- Summary:\n"
+ << "All functions: " << AllFunctions
+ << ", imported functions: " << ImportedFunctions << "\n"
+ << getStatString("inlined functions", InlinedFunctionsCount,
+ AllFunctions, "all functions")
+ << getStatString("imported functions inlined anywhere",
+ InlinedImportedFunctionsCount, ImportedFunctions,
+ "imported functions")
+ << getStatString("imported functions inlined into importing module",
+ InlinedImportedFunctionsToImportingModuleCount,
+ ImportedFunctions, "imported functions",
+ /*LineEnd=*/false)
+ << getStatString(", remaining", ImportedNotInlinedIntoModule,
+ ImportedFunctions, "imported functions")
+ << getStatString("non-imported functions inlined anywhere",
+ InlinedNotImportedFunctionsCount,
+ NotImportedFuncCount, "non-imported functions")
+ << getStatString(
+ "non-imported functions inlined into importing module",
+ InlinedNotImportedFunctionsToImportingModuleCount,
+ NotImportedFuncCount, "non-imported functions");
+ Ostream.flush();
+ dbgs() << Out;
+}
+
+void ImportedFunctionsInliningStatistics::calculateRealInlines() {
+ // Removing duplicated Callers.
+ std::sort(NonImportedCallers.begin(), NonImportedCallers.end());
+ NonImportedCallers.erase(
+ std::unique(NonImportedCallers.begin(), NonImportedCallers.end()),
+ NonImportedCallers.end());
+
+ for (const auto &Name : NonImportedCallers) {
+ auto &Node = *NodesMap[Name];
+ if (!Node.Visited)
+ dfs(Node);
+ }
+}
+
+void ImportedFunctionsInliningStatistics::dfs(InlineGraphNode &GraphNode) {
+ assert(!GraphNode.Visited);
+ GraphNode.Visited = true;
+ for (auto *const InlinedFunctionNode : GraphNode.InlinedCallees) {
+ InlinedFunctionNode->NumberOfRealInlines++;
+ if (!InlinedFunctionNode->Visited)
+ dfs(*InlinedFunctionNode);
+ }
+}
+
+ImportedFunctionsInliningStatistics::SortedNodesTy
+ImportedFunctionsInliningStatistics::getSortedNodes() {
+ SortedNodesTy SortedNodes;
+ SortedNodes.reserve(NodesMap.size());
+ for (const NodesMapTy::value_type& Node : NodesMap)
+ SortedNodes.push_back(&Node);
+
+ std::sort(
+ SortedNodes.begin(), SortedNodes.end(),
+ [&](const SortedNodesTy::value_type &Lhs,
+ const SortedNodesTy::value_type &Rhs) {
+ if (Lhs->second->NumberOfInlines != Rhs->second->NumberOfInlines)
+ return Lhs->second->NumberOfInlines > Rhs->second->NumberOfInlines;
+ if (Lhs->second->NumberOfRealInlines != Rhs->second->NumberOfRealInlines)
+ return Lhs->second->NumberOfRealInlines >
+ Rhs->second->NumberOfRealInlines;
+ return Lhs->first() < Rhs->first();
+ });
+ return SortedNodes;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
new file mode 100644
index 000000000000..2a18c140c788
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -0,0 +1,2282 @@
+//===- InlineFunction.cpp - Code to perform function inlining -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inlining of a function into a call site, resolving
+// parameters and the return value as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true),
+ cl::Hidden,
+ cl::desc("Convert noalias attributes to metadata during inlining."));
+
+static cl::opt<bool>
+PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
+ cl::init(true), cl::Hidden,
+ cl::desc("Convert align attributes to assumptions during inlining."));
+
+bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
+ AAResults *CalleeAAR, bool InsertLifetime) {
+ return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime);
+}
+bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
+ AAResults *CalleeAAR, bool InsertLifetime) {
+ return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime);
+}
+
+namespace {
+ /// A class for recording information about inlining a landing pad.
+ class LandingPadInliningInfo {
+ BasicBlock *OuterResumeDest; ///< Destination of the invoke's unwind.
+ BasicBlock *InnerResumeDest; ///< Destination for the callee's resume.
+ LandingPadInst *CallerLPad; ///< LandingPadInst associated with the invoke.
+ PHINode *InnerEHValuesPHI; ///< PHI for EH values from landingpad insts.
+ SmallVector<Value*, 8> UnwindDestPHIValues;
+
+ public:
+ LandingPadInliningInfo(InvokeInst *II)
+ : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr),
+ CallerLPad(nullptr), InnerEHValuesPHI(nullptr) {
+ // If there are PHI nodes in the unwind destination block, we need to keep
+ // track of which values came into them from the invoke before removing
+ // the edge from this block.
+ llvm::BasicBlock *InvokeBB = II->getParent();
+ BasicBlock::iterator I = OuterResumeDest->begin();
+ for (; isa<PHINode>(I); ++I) {
+ // Save the value to use for this edge.
+ PHINode *PHI = cast<PHINode>(I);
+ UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
+ }
+
+ CallerLPad = cast<LandingPadInst>(I);
+ }
+
+ /// The outer unwind destination is the target of
+ /// unwind edges introduced for calls within the inlined function.
+ BasicBlock *getOuterResumeDest() const {
+ return OuterResumeDest;
+ }
+
+ BasicBlock *getInnerResumeDest();
+
+ LandingPadInst *getLandingPadInst() const { return CallerLPad; }
+
+ /// Forward the 'resume' instruction to the caller's landing pad block.
+ /// When the landing pad block has only one predecessor, this is
+ /// a simple branch. When there is more than one predecessor, we need to
+ /// split the landing pad block after the landingpad instruction and jump
+ /// to there.
+ void forwardResume(ResumeInst *RI,
+ SmallPtrSetImpl<LandingPadInst*> &InlinedLPads);
+
+ /// Add incoming-PHI values to the unwind destination block for the given
+ /// basic block, using the values for the original invoke's source block.
+ void addIncomingPHIValuesFor(BasicBlock *BB) const {
+ addIncomingPHIValuesForInto(BB, OuterResumeDest);
+ }
+
+ void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
+ BasicBlock::iterator I = dest->begin();
+ for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
+ PHINode *phi = cast<PHINode>(I);
+ phi->addIncoming(UnwindDestPHIValues[i], src);
+ }
+ }
+ };
+} // anonymous namespace
+
+/// Get or create a target for the branch from ResumeInsts.
+BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
+ if (InnerResumeDest) return InnerResumeDest;
+
+ // Split the landing pad.
+ BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator();
+ InnerResumeDest =
+ OuterResumeDest->splitBasicBlock(SplitPoint,
+ OuterResumeDest->getName() + ".body");
+
+ // The number of incoming edges we expect to the inner landing pad.
+ const unsigned PHICapacity = 2;
+
+ // Create corresponding new PHIs for all the PHIs in the outer landing pad.
+ Instruction *InsertPoint = &InnerResumeDest->front();
+ BasicBlock::iterator I = OuterResumeDest->begin();
+ for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
+ PHINode *OuterPHI = cast<PHINode>(I);
+ PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity,
+ OuterPHI->getName() + ".lpad-body",
+ InsertPoint);
+ OuterPHI->replaceAllUsesWith(InnerPHI);
+ InnerPHI->addIncoming(OuterPHI, OuterResumeDest);
+ }
+
+ // Create a PHI for the exception values.
+ InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity,
+ "eh.lpad-body", InsertPoint);
+ CallerLPad->replaceAllUsesWith(InnerEHValuesPHI);
+ InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest);
+
+ // All done.
+ return InnerResumeDest;
+}
+
+/// Forward the 'resume' instruction to the caller's landing pad block.
+/// When the landing pad block has only one predecessor, this is a simple
+/// branch. When there is more than one predecessor, we need to split the
+/// landing pad block after the landingpad instruction and jump to there.
+void LandingPadInliningInfo::forwardResume(
+ ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) {
+ BasicBlock *Dest = getInnerResumeDest();
+ BasicBlock *Src = RI->getParent();
+
+ BranchInst::Create(Dest, Src);
+
+ // Update the PHIs in the destination. They were inserted in an order which
+ // makes this work.
+ addIncomingPHIValuesForInto(Src, Dest);
+
+ InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
+ RI->eraseFromParent();
+}
+
+/// Helper for getUnwindDestToken/getUnwindDestTokenHelper.
+static Value *getParentPad(Value *EHPad) {
+ if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad))
+ return FPI->getParentPad();
+ return cast<CatchSwitchInst>(EHPad)->getParentPad();
+}
+
+typedef DenseMap<Instruction *, Value *> UnwindDestMemoTy;
+
+/// Helper for getUnwindDestToken that does the descendant-ward part of
+/// the search.
+static Value *getUnwindDestTokenHelper(Instruction *EHPad,
+ UnwindDestMemoTy &MemoMap) {
+ SmallVector<Instruction *, 8> Worklist(1, EHPad);
+
+ while (!Worklist.empty()) {
+ Instruction *CurrentPad = Worklist.pop_back_val();
+ // We only put pads on the worklist that aren't in the MemoMap. When
+ // we find an unwind dest for a pad we may update its ancestors, but
+ // the queue only ever contains uncles/great-uncles/etc. of CurrentPad,
+ // so they should never get updated while queued on the worklist.
+ assert(!MemoMap.count(CurrentPad));
+ Value *UnwindDestToken = nullptr;
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) {
+ if (CatchSwitch->hasUnwindDest()) {
+ UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI();
+ } else {
+ // Catchswitch doesn't have a 'nounwind' variant, and one might be
+ // annotated as "unwinds to caller" when really it's nounwind (see
+ // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the
+ // parent's unwind dest from this. We can check its catchpads'
+ // descendants, since they might include a cleanuppad with an
+ // "unwinds to caller" cleanupret, which can be trusted.
+ for (auto HI = CatchSwitch->handler_begin(),
+ HE = CatchSwitch->handler_end();
+ HI != HE && !UnwindDestToken; ++HI) {
+ BasicBlock *HandlerBlock = *HI;
+ auto *CatchPad = cast<CatchPadInst>(HandlerBlock->getFirstNonPHI());
+ for (User *Child : CatchPad->users()) {
+ // Intentionally ignore invokes here -- since the catchswitch is
+ // marked "unwind to caller", it would be a verifier error if it
+ // contained an invoke which unwinds out of it, so any invoke we'd
+ // encounter must unwind to some child of the catch.
+ if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child))
+ continue;
+
+ Instruction *ChildPad = cast<Instruction>(Child);
+ auto Memo = MemoMap.find(ChildPad);
+ if (Memo == MemoMap.end()) {
+ // Haven't figured out this child pad yet; queue it.
+ Worklist.push_back(ChildPad);
+ continue;
+ }
+ // We've already checked this child, but might have found that
+ // it offers no proof either way.
+ Value *ChildUnwindDestToken = Memo->second;
+ if (!ChildUnwindDestToken)
+ continue;
+ // We already know the child's unwind dest, which can either
+ // be ConstantTokenNone to indicate unwind to caller, or can
+ // be another child of the catchpad. Only the former indicates
+ // the unwind dest of the catchswitch.
+ if (isa<ConstantTokenNone>(ChildUnwindDestToken)) {
+ UnwindDestToken = ChildUnwindDestToken;
+ break;
+ }
+ assert(getParentPad(ChildUnwindDestToken) == CatchPad);
+ }
+ }
+ }
+ } else {
+ auto *CleanupPad = cast<CleanupPadInst>(CurrentPad);
+ for (User *U : CleanupPad->users()) {
+ if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) {
+ if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest())
+ UnwindDestToken = RetUnwindDest->getFirstNonPHI();
+ else
+ UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext());
+ break;
+ }
+ Value *ChildUnwindDestToken;
+ if (auto *Invoke = dyn_cast<InvokeInst>(U)) {
+ ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI();
+ } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) {
+ Instruction *ChildPad = cast<Instruction>(U);
+ auto Memo = MemoMap.find(ChildPad);
+ if (Memo == MemoMap.end()) {
+ // Haven't resolved this child yet; queue it and keep searching.
+ Worklist.push_back(ChildPad);
+ continue;
+ }
+ // We've checked this child, but still need to ignore it if it
+ // had no proof either way.
+ ChildUnwindDestToken = Memo->second;
+ if (!ChildUnwindDestToken)
+ continue;
+ } else {
+ // Not a relevant user of the cleanuppad
+ continue;
+ }
+ // In a well-formed program, the child/invoke must either unwind to
+ // an(other) child of the cleanup, or exit the cleanup. In the
+ // first case, continue searching.
+ if (isa<Instruction>(ChildUnwindDestToken) &&
+ getParentPad(ChildUnwindDestToken) == CleanupPad)
+ continue;
+ UnwindDestToken = ChildUnwindDestToken;
+ break;
+ }
+ }
+ // If we haven't found an unwind dest for CurrentPad, we may have queued its
+ // children, so move on to the next in the worklist.
+ if (!UnwindDestToken)
+ continue;
+
+ // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits
+ // any ancestors of CurrentPad up to but not including UnwindDestToken's
+ // parent pad. Record this in the memo map, and check to see if the
+ // original EHPad being queried is one of the ones exited.
+ Value *UnwindParent;
+ if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken))
+ UnwindParent = getParentPad(UnwindPad);
+ else
+ UnwindParent = nullptr;
+ bool ExitedOriginalPad = false;
+ for (Instruction *ExitedPad = CurrentPad;
+ ExitedPad && ExitedPad != UnwindParent;
+ ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) {
+ // Skip over catchpads since they just follow their catchswitches.
+ if (isa<CatchPadInst>(ExitedPad))
+ continue;
+ MemoMap[ExitedPad] = UnwindDestToken;
+ ExitedOriginalPad |= (ExitedPad == EHPad);
+ }
+
+ if (ExitedOriginalPad)
+ return UnwindDestToken;
+
+ // Continue the search.
+ }
+
+ // No definitive information is contained within this funclet.
+ return nullptr;
+}
+
+/// Given an EH pad, find where it unwinds. If it unwinds to an EH pad,
+/// return that pad instruction. If it unwinds to caller, return
+/// ConstantTokenNone. If it does not have a definitive unwind destination,
+/// return nullptr.
+///
+/// This routine gets invoked for calls in funclets in inlinees when inlining
+/// an invoke. Since many funclets don't have calls inside them, it's queried
+/// on-demand rather than building a map of pads to unwind dests up front.
+/// Determining a funclet's unwind dest may require recursively searching its
+/// descendants, and also ancestors and cousins if the descendants don't provide
+/// an answer. Since most funclets will have their unwind dest immediately
+/// available as the unwind dest of a catchswitch or cleanupret, this routine
+/// searches top-down from the given pad and then up. To avoid worst-case
+/// quadratic run-time given that approach, it uses a memo map to avoid
+/// re-processing funclet trees. The callers that rewrite the IR as they go
+/// take advantage of this, for correctness, by checking/forcing rewritten
+/// pads' entries to match the original callee view.
+static Value *getUnwindDestToken(Instruction *EHPad,
+ UnwindDestMemoTy &MemoMap) {
+ // Catchpads unwind to the same place as their catchswitch;
+ // redirct any queries on catchpads so the code below can
+ // deal with just catchswitches and cleanuppads.
+ if (auto *CPI = dyn_cast<CatchPadInst>(EHPad))
+ EHPad = CPI->getCatchSwitch();
+
+ // Check if we've already determined the unwind dest for this pad.
+ auto Memo = MemoMap.find(EHPad);
+ if (Memo != MemoMap.end())
+ return Memo->second;
+
+ // Search EHPad and, if necessary, its descendants.
+ Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap);
+ assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0));
+ if (UnwindDestToken)
+ return UnwindDestToken;
+
+ // No information is available for this EHPad from itself or any of its
+ // descendants. An unwind all the way out to a pad in the caller would
+ // need also to agree with the unwind dest of the parent funclet, so
+ // search up the chain to try to find a funclet with information. Put
+ // null entries in the memo map to avoid re-processing as we go up.
+ MemoMap[EHPad] = nullptr;
+#ifndef NDEBUG
+ SmallPtrSet<Instruction *, 4> TempMemos;
+ TempMemos.insert(EHPad);
+#endif
+ Instruction *LastUselessPad = EHPad;
+ Value *AncestorToken;
+ for (AncestorToken = getParentPad(EHPad);
+ auto *AncestorPad = dyn_cast<Instruction>(AncestorToken);
+ AncestorToken = getParentPad(AncestorToken)) {
+ // Skip over catchpads since they just follow their catchswitches.
+ if (isa<CatchPadInst>(AncestorPad))
+ continue;
+ // If the MemoMap had an entry mapping AncestorPad to nullptr, since we
+ // haven't yet called getUnwindDestTokenHelper for AncestorPad in this
+ // call to getUnwindDestToken, that would mean that AncestorPad had no
+ // information in itself, its descendants, or its ancestors. If that
+ // were the case, then we should also have recorded the lack of information
+ // for the descendant that we're coming from. So assert that we don't
+ // find a null entry in the MemoMap for AncestorPad.
+ assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]);
+ auto AncestorMemo = MemoMap.find(AncestorPad);
+ if (AncestorMemo == MemoMap.end()) {
+ UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap);
+ } else {
+ UnwindDestToken = AncestorMemo->second;
+ }
+ if (UnwindDestToken)
+ break;
+ LastUselessPad = AncestorPad;
+ MemoMap[LastUselessPad] = nullptr;
+#ifndef NDEBUG
+ TempMemos.insert(LastUselessPad);
+#endif
+ }
+
+ // We know that getUnwindDestTokenHelper was called on LastUselessPad and
+ // returned nullptr (and likewise for EHPad and any of its ancestors up to
+ // LastUselessPad), so LastUselessPad has no information from below. Since
+ // getUnwindDestTokenHelper must investigate all downward paths through
+ // no-information nodes to prove that a node has no information like this,
+ // and since any time it finds information it records it in the MemoMap for
+ // not just the immediately-containing funclet but also any ancestors also
+ // exited, it must be the case that, walking downward from LastUselessPad,
+ // visiting just those nodes which have not been mapped to an unwind dest
+ // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since
+ // they are just used to keep getUnwindDestTokenHelper from repeating work),
+ // any node visited must have been exhaustively searched with no information
+ // for it found.
+ SmallVector<Instruction *, 8> Worklist(1, LastUselessPad);
+ while (!Worklist.empty()) {
+ Instruction *UselessPad = Worklist.pop_back_val();
+ auto Memo = MemoMap.find(UselessPad);
+ if (Memo != MemoMap.end() && Memo->second) {
+ // Here the name 'UselessPad' is a bit of a misnomer, because we've found
+ // that it is a funclet that does have information about unwinding to
+ // a particular destination; its parent was a useless pad.
+ // Since its parent has no information, the unwind edge must not escape
+ // the parent, and must target a sibling of this pad. This local unwind
+ // gives us no information about EHPad. Leave it and the subtree rooted
+ // at it alone.
+ assert(getParentPad(Memo->second) == getParentPad(UselessPad));
+ continue;
+ }
+ // We know we don't have information for UselesPad. If it has an entry in
+ // the MemoMap (mapping it to nullptr), it must be one of the TempMemos
+ // added on this invocation of getUnwindDestToken; if a previous invocation
+ // recorded nullptr, it would have had to prove that the ancestors of
+ // UselessPad, which include LastUselessPad, had no information, and that
+ // in turn would have required proving that the descendants of
+ // LastUselesPad, which include EHPad, have no information about
+ // LastUselessPad, which would imply that EHPad was mapped to nullptr in
+ // the MemoMap on that invocation, which isn't the case if we got here.
+ assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad));
+ // Assert as we enumerate users that 'UselessPad' doesn't have any unwind
+ // information that we'd be contradicting by making a map entry for it
+ // (which is something that getUnwindDestTokenHelper must have proved for
+ // us to get here). Just assert on is direct users here; the checks in
+ // this downward walk at its descendants will verify that they don't have
+ // any unwind edges that exit 'UselessPad' either (i.e. they either have no
+ // unwind edges or unwind to a sibling).
+ MemoMap[UselessPad] = UnwindDestToken;
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) {
+ assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad");
+ for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) {
+ auto *CatchPad = HandlerBlock->getFirstNonPHI();
+ for (User *U : CatchPad->users()) {
+ assert(
+ (!isa<InvokeInst>(U) ||
+ (getParentPad(
+ cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==
+ CatchPad)) &&
+ "Expected useless pad");
+ if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))
+ Worklist.push_back(cast<Instruction>(U));
+ }
+ }
+ } else {
+ assert(isa<CleanupPadInst>(UselessPad));
+ for (User *U : UselessPad->users()) {
+ assert(!isa<CleanupReturnInst>(U) && "Expected useless pad");
+ assert((!isa<InvokeInst>(U) ||
+ (getParentPad(
+ cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==
+ UselessPad)) &&
+ "Expected useless pad");
+ if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))
+ Worklist.push_back(cast<Instruction>(U));
+ }
+ }
+ }
+
+ return UnwindDestToken;
+}
+
+/// When we inline a basic block into an invoke,
+/// we have to turn all of the calls that can throw into invokes.
+/// This function analyze BB to see if there are any calls, and if so,
+/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
+/// nodes in that block with the values specified in InvokeDestPHIValues.
+static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
+ BasicBlock *BB, BasicBlock *UnwindEdge,
+ UnwindDestMemoTy *FuncletUnwindMap = nullptr) {
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+ Instruction *I = &*BBI++;
+
+ // We only need to check for function calls: inlined invoke
+ // instructions require no special handling.
+ CallInst *CI = dyn_cast<CallInst>(I);
+
+ if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))
+ continue;
+
+ // We do not need to (and in fact, cannot) convert possibly throwing calls
+ // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into
+ // invokes. The caller's "segment" of the deoptimization continuation
+ // attached to the newly inlined @llvm.experimental_deoptimize
+ // (resp. @llvm.experimental.guard) call should contain the exception
+ // handling logic, if any.
+ if (auto *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize ||
+ F->getIntrinsicID() == Intrinsic::experimental_guard)
+ continue;
+
+ if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) {
+ // This call is nested inside a funclet. If that funclet has an unwind
+ // destination within the inlinee, then unwinding out of this call would
+ // be UB. Rewriting this call to an invoke which targets the inlined
+ // invoke's unwind dest would give the call's parent funclet multiple
+ // unwind destinations, which is something that subsequent EH table
+ // generation can't handle and that the veirifer rejects. So when we
+ // see such a call, leave it as a call.
+ auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]);
+ Value *UnwindDestToken =
+ getUnwindDestToken(FuncletPad, *FuncletUnwindMap);
+ if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))
+ continue;
+#ifndef NDEBUG
+ Instruction *MemoKey;
+ if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))
+ MemoKey = CatchPad->getCatchSwitch();
+ else
+ MemoKey = FuncletPad;
+ assert(FuncletUnwindMap->count(MemoKey) &&
+ (*FuncletUnwindMap)[MemoKey] == UnwindDestToken &&
+ "must get memoized to avoid confusing later searches");
+#endif // NDEBUG
+ }
+
+ changeToInvokeAndSplitBasicBlock(CI, UnwindEdge);
+ return BB;
+ }
+ return nullptr;
+}
+
+/// If we inlined an invoke site, we need to convert calls
+/// in the body of the inlined function into invokes.
+///
+/// II is the invoke instruction being inlined. FirstNewBlock is the first
+/// block of the inlined code (the last block is the end of the function),
+/// and InlineCodeInfo is information about the code that got inlined.
+static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock,
+ ClonedCodeInfo &InlinedCodeInfo) {
+ BasicBlock *InvokeDest = II->getUnwindDest();
+
+ Function *Caller = FirstNewBlock->getParent();
+
+ // The inlined code is currently at the end of the function, scan from the
+ // start of the inlined code to its end, checking for stuff we need to
+ // rewrite.
+ LandingPadInliningInfo Invoke(II);
+
+ // Get all of the inlined landing pad instructions.
+ SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
+ for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end();
+ I != E; ++I)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
+ InlinedLPads.insert(II->getLandingPadInst());
+
+ // Append the clauses from the outer landing pad instruction into the inlined
+ // landing pad instructions.
+ LandingPadInst *OuterLPad = Invoke.getLandingPadInst();
+ for (LandingPadInst *InlinedLPad : InlinedLPads) {
+ unsigned OuterNum = OuterLPad->getNumClauses();
+ InlinedLPad->reserveClauses(OuterNum);
+ for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx)
+ InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
+ if (OuterLPad->isCleanup())
+ InlinedLPad->setCleanup(true);
+ }
+
+ for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
+ BB != E; ++BB) {
+ if (InlinedCodeInfo.ContainsCalls)
+ if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
+ &*BB, Invoke.getOuterResumeDest()))
+ // Update any PHI nodes in the exceptional block to indicate that there
+ // is now a new entry in them.
+ Invoke.addIncomingPHIValuesFor(NewBB);
+
+ // Forward any resumes that are remaining here.
+ if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
+ Invoke.forwardResume(RI, InlinedLPads);
+ }
+
+ // Now that everything is happy, we have one final detail. The PHI nodes in
+ // the exception destination block still have entries due to the original
+ // invoke instruction. Eliminate these entries (which might even delete the
+ // PHI node) now.
+ InvokeDest->removePredecessor(II->getParent());
+}
+
+/// If we inlined an invoke site, we need to convert calls
+/// in the body of the inlined function into invokes.
+///
+/// II is the invoke instruction being inlined. FirstNewBlock is the first
+/// block of the inlined code (the last block is the end of the function),
+/// and InlineCodeInfo is information about the code that got inlined.
+static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
+ ClonedCodeInfo &InlinedCodeInfo) {
+ BasicBlock *UnwindDest = II->getUnwindDest();
+ Function *Caller = FirstNewBlock->getParent();
+
+ assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!");
+
+ // If there are PHI nodes in the unwind destination block, we need to keep
+ // track of which values came into them from the invoke before removing the
+ // edge from this block.
+ SmallVector<Value *, 8> UnwindDestPHIValues;
+ llvm::BasicBlock *InvokeBB = II->getParent();
+ for (Instruction &I : *UnwindDest) {
+ // Save the value to use for this edge.
+ PHINode *PHI = dyn_cast<PHINode>(&I);
+ if (!PHI)
+ break;
+ UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
+ }
+
+ // Add incoming-PHI values to the unwind destination block for the given basic
+ // block, using the values for the original invoke's source block.
+ auto UpdatePHINodes = [&](BasicBlock *Src) {
+ BasicBlock::iterator I = UnwindDest->begin();
+ for (Value *V : UnwindDestPHIValues) {
+ PHINode *PHI = cast<PHINode>(I);
+ PHI->addIncoming(V, Src);
+ ++I;
+ }
+ };
+
+ // This connects all the instructions which 'unwind to caller' to the invoke
+ // destination.
+ UnwindDestMemoTy FuncletUnwindMap;
+ for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
+ BB != E; ++BB) {
+ if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
+ if (CRI->unwindsToCaller()) {
+ auto *CleanupPad = CRI->getCleanupPad();
+ CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI);
+ CRI->eraseFromParent();
+ UpdatePHINodes(&*BB);
+ // Finding a cleanupret with an unwind destination would confuse
+ // subsequent calls to getUnwindDestToken, so map the cleanuppad
+ // to short-circuit any such calls and recognize this as an "unwind
+ // to caller" cleanup.
+ assert(!FuncletUnwindMap.count(CleanupPad) ||
+ isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad]));
+ FuncletUnwindMap[CleanupPad] =
+ ConstantTokenNone::get(Caller->getContext());
+ }
+ }
+
+ Instruction *I = BB->getFirstNonPHI();
+ if (!I->isEHPad())
+ continue;
+
+ Instruction *Replacement = nullptr;
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
+ if (CatchSwitch->unwindsToCaller()) {
+ Value *UnwindDestToken;
+ if (auto *ParentPad =
+ dyn_cast<Instruction>(CatchSwitch->getParentPad())) {
+ // This catchswitch is nested inside another funclet. If that
+ // funclet has an unwind destination within the inlinee, then
+ // unwinding out of this catchswitch would be UB. Rewriting this
+ // catchswitch to unwind to the inlined invoke's unwind dest would
+ // give the parent funclet multiple unwind destinations, which is
+ // something that subsequent EH table generation can't handle and
+ // that the veirifer rejects. So when we see such a call, leave it
+ // as "unwind to caller".
+ UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap);
+ if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))
+ continue;
+ } else {
+ // This catchswitch has no parent to inherit constraints from, and
+ // none of its descendants can have an unwind edge that exits it and
+ // targets another funclet in the inlinee. It may or may not have a
+ // descendant that definitively has an unwind to caller. In either
+ // case, we'll have to assume that any unwinds out of it may need to
+ // be routed to the caller, so treat it as though it has a definitive
+ // unwind to caller.
+ UnwindDestToken = ConstantTokenNone::get(Caller->getContext());
+ }
+ auto *NewCatchSwitch = CatchSwitchInst::Create(
+ CatchSwitch->getParentPad(), UnwindDest,
+ CatchSwitch->getNumHandlers(), CatchSwitch->getName(),
+ CatchSwitch);
+ for (BasicBlock *PadBB : CatchSwitch->handlers())
+ NewCatchSwitch->addHandler(PadBB);
+ // Propagate info for the old catchswitch over to the new one in
+ // the unwind map. This also serves to short-circuit any subsequent
+ // checks for the unwind dest of this catchswitch, which would get
+ // confused if they found the outer handler in the callee.
+ FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken;
+ Replacement = NewCatchSwitch;
+ }
+ } else if (!isa<FuncletPadInst>(I)) {
+ llvm_unreachable("unexpected EHPad!");
+ }
+
+ if (Replacement) {
+ Replacement->takeName(I);
+ I->replaceAllUsesWith(Replacement);
+ I->eraseFromParent();
+ UpdatePHINodes(&*BB);
+ }
+ }
+
+ if (InlinedCodeInfo.ContainsCalls)
+ for (Function::iterator BB = FirstNewBlock->getIterator(),
+ E = Caller->end();
+ BB != E; ++BB)
+ if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
+ &*BB, UnwindDest, &FuncletUnwindMap))
+ // Update any PHI nodes in the exceptional block to indicate that there
+ // is now a new entry in them.
+ UpdatePHINodes(NewBB);
+
+ // Now that everything is happy, we have one final detail. The PHI nodes in
+ // the exception destination block still have entries due to the original
+ // invoke instruction. Eliminate these entries (which might even delete the
+ // PHI node) now.
+ UnwindDest->removePredecessor(InvokeBB);
+}
+
+/// When inlining a call site that has !llvm.mem.parallel_loop_access metadata,
+/// that metadata should be propagated to all memory-accessing cloned
+/// instructions.
+static void PropagateParallelLoopAccessMetadata(CallSite CS,
+ ValueToValueMapTy &VMap) {
+ MDNode *M =
+ CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
+ if (!M)
+ return;
+
+ for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
+ VMI != VMIE; ++VMI) {
+ if (!VMI->second)
+ continue;
+
+ Instruction *NI = dyn_cast<Instruction>(VMI->second);
+ if (!NI)
+ continue;
+
+ if (MDNode *PM = NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) {
+ M = MDNode::concatenate(PM, M);
+ NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
+ } else if (NI->mayReadOrWriteMemory()) {
+ NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
+ }
+ }
+}
+
+/// When inlining a function that contains noalias scope metadata,
+/// this metadata needs to be cloned so that the inlined blocks
+/// have different "unique scopes" at every call site. Were this not done, then
+/// aliasing scopes from a function inlined into a caller multiple times could
+/// not be differentiated (and this would lead to miscompiles because the
+/// non-aliasing property communicated by the metadata could have
+/// call-site-specific control dependencies).
+static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
+ const Function *CalledFunc = CS.getCalledFunction();
+ SetVector<const MDNode *> MD;
+
+ // Note: We could only clone the metadata if it is already used in the
+ // caller. I'm omitting that check here because it might confuse
+ // inter-procedural alias analysis passes. We can revisit this if it becomes
+ // an efficiency or overhead problem.
+
+ for (const BasicBlock &I : *CalledFunc)
+ for (const Instruction &J : I) {
+ if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope))
+ MD.insert(M);
+ if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias))
+ MD.insert(M);
+ }
+
+ if (MD.empty())
+ return;
+
+ // Walk the existing metadata, adding the complete (perhaps cyclic) chain to
+ // the set.
+ SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end());
+ while (!Queue.empty()) {
+ const MDNode *M = cast<MDNode>(Queue.pop_back_val());
+ for (unsigned i = 0, ie = M->getNumOperands(); i != ie; ++i)
+ if (const MDNode *M1 = dyn_cast<MDNode>(M->getOperand(i)))
+ if (MD.insert(M1))
+ Queue.push_back(M1);
+ }
+
+ // Now we have a complete set of all metadata in the chains used to specify
+ // the noalias scopes and the lists of those scopes.
+ SmallVector<TempMDTuple, 16> DummyNodes;
+ DenseMap<const MDNode *, TrackingMDNodeRef> MDMap;
+ for (const MDNode *I : MD) {
+ DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None));
+ MDMap[I].reset(DummyNodes.back().get());
+ }
+
+ // Create new metadata nodes to replace the dummy nodes, replacing old
+ // metadata references with either a dummy node or an already-created new
+ // node.
+ for (const MDNode *I : MD) {
+ SmallVector<Metadata *, 4> NewOps;
+ for (unsigned i = 0, ie = I->getNumOperands(); i != ie; ++i) {
+ const Metadata *V = I->getOperand(i);
+ if (const MDNode *M = dyn_cast<MDNode>(V))
+ NewOps.push_back(MDMap[M]);
+ else
+ NewOps.push_back(const_cast<Metadata *>(V));
+ }
+
+ MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps);
+ MDTuple *TempM = cast<MDTuple>(MDMap[I]);
+ assert(TempM->isTemporary() && "Expected temporary node");
+
+ TempM->replaceAllUsesWith(NewM);
+ }
+
+ // Now replace the metadata in the new inlined instructions with the
+ // repacements from the map.
+ for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
+ VMI != VMIE; ++VMI) {
+ if (!VMI->second)
+ continue;
+
+ Instruction *NI = dyn_cast<Instruction>(VMI->second);
+ if (!NI)
+ continue;
+
+ if (MDNode *M = NI->getMetadata(LLVMContext::MD_alias_scope)) {
+ MDNode *NewMD = MDMap[M];
+ // If the call site also had alias scope metadata (a list of scopes to
+ // which instructions inside it might belong), propagate those scopes to
+ // the inlined instructions.
+ if (MDNode *CSM =
+ CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope))
+ NewMD = MDNode::concatenate(NewMD, CSM);
+ NI->setMetadata(LLVMContext::MD_alias_scope, NewMD);
+ } else if (NI->mayReadOrWriteMemory()) {
+ if (MDNode *M =
+ CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope))
+ NI->setMetadata(LLVMContext::MD_alias_scope, M);
+ }
+
+ if (MDNode *M = NI->getMetadata(LLVMContext::MD_noalias)) {
+ MDNode *NewMD = MDMap[M];
+ // If the call site also had noalias metadata (a list of scopes with
+ // which instructions inside it don't alias), propagate those scopes to
+ // the inlined instructions.
+ if (MDNode *CSM =
+ CS.getInstruction()->getMetadata(LLVMContext::MD_noalias))
+ NewMD = MDNode::concatenate(NewMD, CSM);
+ NI->setMetadata(LLVMContext::MD_noalias, NewMD);
+ } else if (NI->mayReadOrWriteMemory()) {
+ if (MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_noalias))
+ NI->setMetadata(LLVMContext::MD_noalias, M);
+ }
+ }
+}
+
+/// If the inlined function has noalias arguments,
+/// then add new alias scopes for each noalias argument, tag the mapped noalias
+/// parameters with noalias metadata specifying the new scope, and tag all
+/// non-derived loads, stores and memory intrinsics with the new alias scopes.
+static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
+ const DataLayout &DL, AAResults *CalleeAAR) {
+ if (!EnableNoAliasConversion)
+ return;
+
+ const Function *CalledFunc = CS.getCalledFunction();
+ SmallVector<const Argument *, 4> NoAliasArgs;
+
+ for (const Argument &Arg : CalledFunc->args())
+ if (Arg.hasNoAliasAttr() && !Arg.use_empty())
+ NoAliasArgs.push_back(&Arg);
+
+ if (NoAliasArgs.empty())
+ return;
+
+ // To do a good job, if a noalias variable is captured, we need to know if
+ // the capture point dominates the particular use we're considering.
+ DominatorTree DT;
+ DT.recalculate(const_cast<Function&>(*CalledFunc));
+
+ // noalias indicates that pointer values based on the argument do not alias
+ // pointer values which are not based on it. So we add a new "scope" for each
+ // noalias function argument. Accesses using pointers based on that argument
+ // become part of that alias scope, accesses using pointers not based on that
+ // argument are tagged as noalias with that scope.
+
+ DenseMap<const Argument *, MDNode *> NewScopes;
+ MDBuilder MDB(CalledFunc->getContext());
+
+ // Create a new scope domain for this function.
+ MDNode *NewDomain =
+ MDB.createAnonymousAliasScopeDomain(CalledFunc->getName());
+ for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) {
+ const Argument *A = NoAliasArgs[i];
+
+ std::string Name = CalledFunc->getName();
+ if (A->hasName()) {
+ Name += ": %";
+ Name += A->getName();
+ } else {
+ Name += ": argument ";
+ Name += utostr(i);
+ }
+
+ // Note: We always create a new anonymous root here. This is true regardless
+ // of the linkage of the callee because the aliasing "scope" is not just a
+ // property of the callee, but also all control dependencies in the caller.
+ MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
+ NewScopes.insert(std::make_pair(A, NewScope));
+ }
+
+ // Iterate over all new instructions in the map; for all memory-access
+ // instructions, add the alias scope metadata.
+ for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
+ VMI != VMIE; ++VMI) {
+ if (const Instruction *I = dyn_cast<Instruction>(VMI->first)) {
+ if (!VMI->second)
+ continue;
+
+ Instruction *NI = dyn_cast<Instruction>(VMI->second);
+ if (!NI)
+ continue;
+
+ bool IsArgMemOnlyCall = false, IsFuncCall = false;
+ SmallVector<const Value *, 2> PtrArgs;
+
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+ PtrArgs.push_back(LI->getPointerOperand());
+ else if (const StoreInst *SI = dyn_cast<StoreInst>(I))
+ PtrArgs.push_back(SI->getPointerOperand());
+ else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+ PtrArgs.push_back(VAAI->getPointerOperand());
+ else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I))
+ PtrArgs.push_back(CXI->getPointerOperand());
+ else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))
+ PtrArgs.push_back(RMWI->getPointerOperand());
+ else if (ImmutableCallSite ICS = ImmutableCallSite(I)) {
+ // If we know that the call does not access memory, then we'll still
+ // know that about the inlined clone of this call site, and we don't
+ // need to add metadata.
+ if (ICS.doesNotAccessMemory())
+ continue;
+
+ IsFuncCall = true;
+ if (CalleeAAR) {
+ FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(ICS);
+ if (MRB == FMRB_OnlyAccessesArgumentPointees ||
+ MRB == FMRB_OnlyReadsArgumentPointees)
+ IsArgMemOnlyCall = true;
+ }
+
+ for (Value *Arg : ICS.args()) {
+ // We need to check the underlying objects of all arguments, not just
+ // the pointer arguments, because we might be passing pointers as
+ // integers, etc.
+ // However, if we know that the call only accesses pointer arguments,
+ // then we only need to check the pointer arguments.
+ if (IsArgMemOnlyCall && !Arg->getType()->isPointerTy())
+ continue;
+
+ PtrArgs.push_back(Arg);
+ }
+ }
+
+ // If we found no pointers, then this instruction is not suitable for
+ // pairing with an instruction to receive aliasing metadata.
+ // However, if this is a call, this we might just alias with none of the
+ // noalias arguments.
+ if (PtrArgs.empty() && !IsFuncCall)
+ continue;
+
+ // It is possible that there is only one underlying object, but you
+ // need to go through several PHIs to see it, and thus could be
+ // repeated in the Objects list.
+ SmallPtrSet<const Value *, 4> ObjSet;
+ SmallVector<Metadata *, 4> Scopes, NoAliases;
+
+ SmallSetVector<const Argument *, 4> NAPtrArgs;
+ for (const Value *V : PtrArgs) {
+ SmallVector<Value *, 4> Objects;
+ GetUnderlyingObjects(const_cast<Value*>(V),
+ Objects, DL, /* LI = */ nullptr);
+
+ for (Value *O : Objects)
+ ObjSet.insert(O);
+ }
+
+ // Figure out if we're derived from anything that is not a noalias
+ // argument.
+ bool CanDeriveViaCapture = false, UsesAliasingPtr = false;
+ for (const Value *V : ObjSet) {
+ // Is this value a constant that cannot be derived from any pointer
+ // value (we need to exclude constant expressions, for example, that
+ // are formed from arithmetic on global symbols).
+ bool IsNonPtrConst = isa<ConstantInt>(V) || isa<ConstantFP>(V) ||
+ isa<ConstantPointerNull>(V) ||
+ isa<ConstantDataVector>(V) || isa<UndefValue>(V);
+ if (IsNonPtrConst)
+ continue;
+
+ // If this is anything other than a noalias argument, then we cannot
+ // completely describe the aliasing properties using alias.scope
+ // metadata (and, thus, won't add any).
+ if (const Argument *A = dyn_cast<Argument>(V)) {
+ if (!A->hasNoAliasAttr())
+ UsesAliasingPtr = true;
+ } else {
+ UsesAliasingPtr = true;
+ }
+
+ // If this is not some identified function-local object (which cannot
+ // directly alias a noalias argument), or some other argument (which,
+ // by definition, also cannot alias a noalias argument), then we could
+ // alias a noalias argument that has been captured).
+ if (!isa<Argument>(V) &&
+ !isIdentifiedFunctionLocal(const_cast<Value*>(V)))
+ CanDeriveViaCapture = true;
+ }
+
+ // A function call can always get captured noalias pointers (via other
+ // parameters, globals, etc.).
+ if (IsFuncCall && !IsArgMemOnlyCall)
+ CanDeriveViaCapture = true;
+
+ // First, we want to figure out all of the sets with which we definitely
+ // don't alias. Iterate over all noalias set, and add those for which:
+ // 1. The noalias argument is not in the set of objects from which we
+ // definitely derive.
+ // 2. The noalias argument has not yet been captured.
+ // An arbitrary function that might load pointers could see captured
+ // noalias arguments via other noalias arguments or globals, and so we
+ // must always check for prior capture.
+ for (const Argument *A : NoAliasArgs) {
+ if (!ObjSet.count(A) && (!CanDeriveViaCapture ||
+ // It might be tempting to skip the
+ // PointerMayBeCapturedBefore check if
+ // A->hasNoCaptureAttr() is true, but this is
+ // incorrect because nocapture only guarantees
+ // that no copies outlive the function, not
+ // that the value cannot be locally captured.
+ !PointerMayBeCapturedBefore(A,
+ /* ReturnCaptures */ false,
+ /* StoreCaptures */ false, I, &DT)))
+ NoAliases.push_back(NewScopes[A]);
+ }
+
+ if (!NoAliases.empty())
+ NI->setMetadata(LLVMContext::MD_noalias,
+ MDNode::concatenate(
+ NI->getMetadata(LLVMContext::MD_noalias),
+ MDNode::get(CalledFunc->getContext(), NoAliases)));
+
+ // Next, we want to figure out all of the sets to which we might belong.
+ // We might belong to a set if the noalias argument is in the set of
+ // underlying objects. If there is some non-noalias argument in our list
+ // of underlying objects, then we cannot add a scope because the fact
+ // that some access does not alias with any set of our noalias arguments
+ // cannot itself guarantee that it does not alias with this access
+ // (because there is some pointer of unknown origin involved and the
+ // other access might also depend on this pointer). We also cannot add
+ // scopes to arbitrary functions unless we know they don't access any
+ // non-parameter pointer-values.
+ bool CanAddScopes = !UsesAliasingPtr;
+ if (CanAddScopes && IsFuncCall)
+ CanAddScopes = IsArgMemOnlyCall;
+
+ if (CanAddScopes)
+ for (const Argument *A : NoAliasArgs) {
+ if (ObjSet.count(A))
+ Scopes.push_back(NewScopes[A]);
+ }
+
+ if (!Scopes.empty())
+ NI->setMetadata(
+ LLVMContext::MD_alias_scope,
+ MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope),
+ MDNode::get(CalledFunc->getContext(), Scopes)));
+ }
+ }
+}
+
+/// If the inlined function has non-byval align arguments, then
+/// add @llvm.assume-based alignment assumptions to preserve this information.
+static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
+ if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache)
+ return;
+
+ AssumptionCache *AC = &(*IFI.GetAssumptionCache)(*CS.getCaller());
+ auto &DL = CS.getCaller()->getParent()->getDataLayout();
+
+ // To avoid inserting redundant assumptions, we should check for assumptions
+ // already in the caller. To do this, we might need a DT of the caller.
+ DominatorTree DT;
+ bool DTCalculated = false;
+
+ Function *CalledFunc = CS.getCalledFunction();
+ for (Argument &Arg : CalledFunc->args()) {
+ unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0;
+ if (Align && !Arg.hasByValOrInAllocaAttr() && !Arg.hasNUses(0)) {
+ if (!DTCalculated) {
+ DT.recalculate(*CS.getCaller());
+ DTCalculated = true;
+ }
+
+ // If we can already prove the asserted alignment in the context of the
+ // caller, then don't bother inserting the assumption.
+ Value *ArgVal = CS.getArgument(Arg.getArgNo());
+ if (getKnownAlignment(ArgVal, DL, CS.getInstruction(), AC, &DT) >= Align)
+ continue;
+
+ CallInst *NewAsmp = IRBuilder<>(CS.getInstruction())
+ .CreateAlignmentAssumption(DL, ArgVal, Align);
+ AC->registerAssumption(NewAsmp);
+ }
+ }
+}
+
+/// Once we have cloned code over from a callee into the caller,
+/// update the specified callgraph to reflect the changes we made.
+/// Note that it's possible that not all code was copied over, so only
+/// some edges of the callgraph may remain.
+static void UpdateCallGraphAfterInlining(CallSite CS,
+ Function::iterator FirstNewBlock,
+ ValueToValueMapTy &VMap,
+ InlineFunctionInfo &IFI) {
+ CallGraph &CG = *IFI.CG;
+ const Function *Caller = CS.getCaller();
+ const Function *Callee = CS.getCalledFunction();
+ CallGraphNode *CalleeNode = CG[Callee];
+ CallGraphNode *CallerNode = CG[Caller];
+
+ // Since we inlined some uninlined call sites in the callee into the caller,
+ // add edges from the caller to all of the callees of the callee.
+ CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end();
+
+ // Consider the case where CalleeNode == CallerNode.
+ CallGraphNode::CalledFunctionsVector CallCache;
+ if (CalleeNode == CallerNode) {
+ CallCache.assign(I, E);
+ I = CallCache.begin();
+ E = CallCache.end();
+ }
+
+ for (; I != E; ++I) {
+ const Value *OrigCall = I->first;
+
+ ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
+ // Only copy the edge if the call was inlined!
+ if (VMI == VMap.end() || VMI->second == nullptr)
+ continue;
+
+ // If the call was inlined, but then constant folded, there is no edge to
+ // add. Check for this case.
+ Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
+ if (!NewCall)
+ continue;
+
+ // We do not treat intrinsic calls like real function calls because we
+ // expect them to become inline code; do not add an edge for an intrinsic.
+ CallSite CS = CallSite(NewCall);
+ if (CS && CS.getCalledFunction() && CS.getCalledFunction()->isIntrinsic())
+ continue;
+
+ // Remember that this call site got inlined for the client of
+ // InlineFunction.
+ IFI.InlinedCalls.push_back(NewCall);
+
+ // It's possible that inlining the callsite will cause it to go from an
+ // indirect to a direct call by resolving a function pointer. If this
+ // happens, set the callee of the new call site to a more precise
+ // destination. This can also happen if the call graph node of the caller
+ // was just unnecessarily imprecise.
+ if (!I->second->getFunction())
+ if (Function *F = CallSite(NewCall).getCalledFunction()) {
+ // Indirect call site resolved to direct call.
+ CallerNode->addCalledFunction(CallSite(NewCall), CG[F]);
+
+ continue;
+ }
+
+ CallerNode->addCalledFunction(CallSite(NewCall), I->second);
+ }
+
+ // Update the call graph by deleting the edge from Callee to Caller. We must
+ // do this after the loop above in case Caller and Callee are the same.
+ CallerNode->removeCallEdgeFor(CS);
+}
+
+static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
+ BasicBlock *InsertBlock,
+ InlineFunctionInfo &IFI) {
+ Type *AggTy = cast<PointerType>(Src->getType())->getElementType();
+ IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
+
+ Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy));
+
+ // Always generate a memcpy of alignment 1 here because we don't know
+ // the alignment of the src pointer. Other optimizations can infer
+ // better alignment.
+ Builder.CreateMemCpy(Dst, Src, Size, /*Align=*/1);
+}
+
+/// When inlining a call site that has a byval argument,
+/// we have to make the implicit memcpy explicit by adding it.
+static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
+ const Function *CalledFunc,
+ InlineFunctionInfo &IFI,
+ unsigned ByValAlignment) {
+ PointerType *ArgTy = cast<PointerType>(Arg->getType());
+ Type *AggTy = ArgTy->getElementType();
+
+ Function *Caller = TheCall->getFunction();
+ const DataLayout &DL = Caller->getParent()->getDataLayout();
+
+ // If the called function is readonly, then it could not mutate the caller's
+ // copy of the byval'd memory. In this case, it is safe to elide the copy and
+ // temporary.
+ if (CalledFunc->onlyReadsMemory()) {
+ // If the byval argument has a specified alignment that is greater than the
+ // passed in pointer, then we either have to round up the input pointer or
+ // give up on this transformation.
+ if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment.
+ return Arg;
+
+ AssumptionCache *AC =
+ IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr;
+
+ // If the pointer is already known to be sufficiently aligned, or if we can
+ // round it up to a larger alignment, then we don't need a temporary.
+ if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, AC) >=
+ ByValAlignment)
+ return Arg;
+
+ // Otherwise, we have to make a memcpy to get a safe alignment. This is bad
+ // for code quality, but rarely happens and is required for correctness.
+ }
+
+ // Create the alloca. If we have DataLayout, use nice alignment.
+ unsigned Align = DL.getPrefTypeAlignment(AggTy);
+
+ // If the byval had an alignment specified, we *must* use at least that
+ // alignment, as it is required by the byval argument (and uses of the
+ // pointer inside the callee).
+ Align = std::max(Align, ByValAlignment);
+
+ Value *NewAlloca = new AllocaInst(AggTy, DL.getAllocaAddrSpace(),
+ nullptr, Align, Arg->getName(),
+ &*Caller->begin()->begin());
+ IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
+
+ // Uses of the argument in the function should use our new alloca
+ // instead.
+ return NewAlloca;
+}
+
+// Check whether this Value is used by a lifetime intrinsic.
+static bool isUsedByLifetimeMarker(Value *V) {
+ for (User *U : V->users()) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+// Check whether the given alloca already has
+// lifetime.start or lifetime.end intrinsics.
+static bool hasLifetimeMarkers(AllocaInst *AI) {
+ Type *Ty = AI->getType();
+ Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(),
+ Ty->getPointerAddressSpace());
+ if (Ty == Int8PtrTy)
+ return isUsedByLifetimeMarker(AI);
+
+ // Do a scan to find all the casts to i8*.
+ for (User *U : AI->users()) {
+ if (U->getType() != Int8PtrTy) continue;
+ if (U->stripPointerCasts() != AI) continue;
+ if (isUsedByLifetimeMarker(U))
+ return true;
+ }
+ return false;
+}
+
+/// Return the result of AI->isStaticAlloca() if AI were moved to the entry
+/// block. Allocas used in inalloca calls and allocas of dynamic array size
+/// cannot be static.
+static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
+ return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca();
+}
+
+/// Update inlined instructions' line numbers to
+/// to encode location where these instructions are inlined.
+static void fixupLineNumbers(Function *Fn, Function::iterator FI,
+ Instruction *TheCall, bool CalleeHasDebugInfo) {
+ const DebugLoc &TheCallDL = TheCall->getDebugLoc();
+ if (!TheCallDL)
+ return;
+
+ auto &Ctx = Fn->getContext();
+ DILocation *InlinedAtNode = TheCallDL;
+
+ // Create a unique call site, not to be confused with any other call from the
+ // same location.
+ InlinedAtNode = DILocation::getDistinct(
+ Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(),
+ InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt());
+
+ // Cache the inlined-at nodes as they're built so they are reused, without
+ // this every instruction's inlined-at chain would become distinct from each
+ // other.
+ DenseMap<const MDNode *, MDNode *> IANodes;
+
+ for (; FI != Fn->end(); ++FI) {
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
+ BI != BE; ++BI) {
+ if (DebugLoc DL = BI->getDebugLoc()) {
+ auto IA = DebugLoc::appendInlinedAt(DL, InlinedAtNode, BI->getContext(),
+ IANodes);
+ auto IDL = DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), IA);
+ BI->setDebugLoc(IDL);
+ continue;
+ }
+
+ if (CalleeHasDebugInfo)
+ continue;
+
+ // If the inlined instruction has no line number, make it look as if it
+ // originates from the call location. This is important for
+ // ((__always_inline__, __nodebug__)) functions which must use caller
+ // location for all instructions in their function body.
+
+ // Don't update static allocas, as they may get moved later.
+ if (auto *AI = dyn_cast<AllocaInst>(BI))
+ if (allocaWouldBeStaticInEntry(AI))
+ continue;
+
+ BI->setDebugLoc(TheCallDL);
+ }
+ }
+}
+/// Update the block frequencies of the caller after a callee has been inlined.
+///
+/// Each block cloned into the caller has its block frequency scaled by the
+/// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of
+/// callee's entry block gets the same frequency as the callsite block and the
+/// relative frequencies of all cloned blocks remain the same after cloning.
+static void updateCallerBFI(BasicBlock *CallSiteBlock,
+ const ValueToValueMapTy &VMap,
+ BlockFrequencyInfo *CallerBFI,
+ BlockFrequencyInfo *CalleeBFI,
+ const BasicBlock &CalleeEntryBlock) {
+ SmallPtrSet<BasicBlock *, 16> ClonedBBs;
+ for (auto const &Entry : VMap) {
+ if (!isa<BasicBlock>(Entry.first) || !Entry.second)
+ continue;
+ auto *OrigBB = cast<BasicBlock>(Entry.first);
+ auto *ClonedBB = cast<BasicBlock>(Entry.second);
+ uint64_t Freq = CalleeBFI->getBlockFreq(OrigBB).getFrequency();
+ if (!ClonedBBs.insert(ClonedBB).second) {
+ // Multiple blocks in the callee might get mapped to one cloned block in
+ // the caller since we prune the callee as we clone it. When that happens,
+ // we want to use the maximum among the original blocks' frequencies.
+ uint64_t NewFreq = CallerBFI->getBlockFreq(ClonedBB).getFrequency();
+ if (NewFreq > Freq)
+ Freq = NewFreq;
+ }
+ CallerBFI->setBlockFreq(ClonedBB, Freq);
+ }
+ BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock));
+ CallerBFI->setBlockFreqAndScale(
+ EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(),
+ ClonedBBs);
+}
+
+/// Update the branch metadata for cloned call instructions.
+static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
+ const Optional<uint64_t> &CalleeEntryCount,
+ const Instruction *TheCall,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *CallerBFI) {
+ if (!CalleeEntryCount.hasValue() || CalleeEntryCount.getValue() < 1)
+ return;
+ Optional<uint64_t> CallSiteCount =
+ PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None;
+ uint64_t CallCount =
+ std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0,
+ CalleeEntryCount.getValue());
+
+ for (auto const &Entry : VMap)
+ if (isa<CallInst>(Entry.first))
+ if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
+ CI->updateProfWeight(CallCount, CalleeEntryCount.getValue());
+ for (BasicBlock &BB : *Callee)
+ // No need to update the callsite if it is pruned during inlining.
+ if (VMap.count(&BB))
+ for (Instruction &I : BB)
+ if (CallInst *CI = dyn_cast<CallInst>(&I))
+ CI->updateProfWeight(CalleeEntryCount.getValue() - CallCount,
+ CalleeEntryCount.getValue());
+}
+
+/// Update the entry count of callee after inlining.
+///
+/// The callsite's block count is subtracted from the callee's function entry
+/// count.
+static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB,
+ Instruction *CallInst, Function *Callee,
+ ProfileSummaryInfo *PSI) {
+ // If the callee has a original count of N, and the estimated count of
+ // callsite is M, the new callee count is set to N - M. M is estimated from
+ // the caller's entry count, its entry block frequency and the block frequency
+ // of the callsite.
+ Optional<uint64_t> CalleeCount = Callee->getEntryCount();
+ if (!CalleeCount.hasValue() || !PSI)
+ return;
+ Optional<uint64_t> CallCount = PSI->getProfileCount(CallInst, CallerBFI);
+ if (!CallCount.hasValue())
+ return;
+ // Since CallSiteCount is an estimate, it could exceed the original callee
+ // count and has to be set to 0.
+ if (CallCount.getValue() > CalleeCount.getValue())
+ Callee->setEntryCount(0);
+ else
+ Callee->setEntryCount(CalleeCount.getValue() - CallCount.getValue());
+}
+
+/// This function inlines the called function into the basic block of the
+/// caller. This returns false if it is not possible to inline this call.
+/// The program is still in a well defined state if this occurs though.
+///
+/// Note that this only does one level of inlining. For example, if the
+/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
+/// exists in the instruction stream. Similarly this will inline a recursive
+/// function by one level.
+bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
+ AAResults *CalleeAAR, bool InsertLifetime) {
+ Instruction *TheCall = CS.getInstruction();
+ assert(TheCall->getParent() && TheCall->getFunction()
+ && "Instruction not in function!");
+
+ // If IFI has any state in it, zap it before we fill it in.
+ IFI.reset();
+
+ Function *CalledFunc = CS.getCalledFunction();
+ if (!CalledFunc || // Can't inline external function or indirect
+ CalledFunc->isDeclaration() || // call, or call to a vararg function!
+ CalledFunc->getFunctionType()->isVarArg()) return false;
+
+ // The inliner does not know how to inline through calls with operand bundles
+ // in general ...
+ if (CS.hasOperandBundles()) {
+ for (int i = 0, e = CS.getNumOperandBundles(); i != e; ++i) {
+ uint32_t Tag = CS.getOperandBundleAt(i).getTagID();
+ // ... but it knows how to inline through "deopt" operand bundles ...
+ if (Tag == LLVMContext::OB_deopt)
+ continue;
+ // ... and "funclet" operand bundles.
+ if (Tag == LLVMContext::OB_funclet)
+ continue;
+
+ return false;
+ }
+ }
+
+ // If the call to the callee cannot throw, set the 'nounwind' flag on any
+ // calls that we inline.
+ bool MarkNoUnwind = CS.doesNotThrow();
+
+ BasicBlock *OrigBB = TheCall->getParent();
+ Function *Caller = OrigBB->getParent();
+
+ // GC poses two hazards to inlining, which only occur when the callee has GC:
+ // 1. If the caller has no GC, then the callee's GC must be propagated to the
+ // caller.
+ // 2. If the caller has a differing GC, it is invalid to inline.
+ if (CalledFunc->hasGC()) {
+ if (!Caller->hasGC())
+ Caller->setGC(CalledFunc->getGC());
+ else if (CalledFunc->getGC() != Caller->getGC())
+ return false;
+ }
+
+ // Get the personality function from the callee if it contains a landing pad.
+ Constant *CalledPersonality =
+ CalledFunc->hasPersonalityFn()
+ ? CalledFunc->getPersonalityFn()->stripPointerCasts()
+ : nullptr;
+
+ // Find the personality function used by the landing pads of the caller. If it
+ // exists, then check to see that it matches the personality function used in
+ // the callee.
+ Constant *CallerPersonality =
+ Caller->hasPersonalityFn()
+ ? Caller->getPersonalityFn()->stripPointerCasts()
+ : nullptr;
+ if (CalledPersonality) {
+ if (!CallerPersonality)
+ Caller->setPersonalityFn(CalledPersonality);
+ // If the personality functions match, then we can perform the
+ // inlining. Otherwise, we can't inline.
+ // TODO: This isn't 100% true. Some personality functions are proper
+ // supersets of others and can be used in place of the other.
+ else if (CalledPersonality != CallerPersonality)
+ return false;
+ }
+
+ // We need to figure out which funclet the callsite was in so that we may
+ // properly nest the callee.
+ Instruction *CallSiteEHPad = nullptr;
+ if (CallerPersonality) {
+ EHPersonality Personality = classifyEHPersonality(CallerPersonality);
+ if (isFuncletEHPersonality(Personality)) {
+ Optional<OperandBundleUse> ParentFunclet =
+ CS.getOperandBundle(LLVMContext::OB_funclet);
+ if (ParentFunclet)
+ CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front());
+
+ // OK, the inlining site is legal. What about the target function?
+
+ if (CallSiteEHPad) {
+ if (Personality == EHPersonality::MSVC_CXX) {
+ // The MSVC personality cannot tolerate catches getting inlined into
+ // cleanup funclets.
+ if (isa<CleanupPadInst>(CallSiteEHPad)) {
+ // Ok, the call site is within a cleanuppad. Let's check the callee
+ // for catchpads.
+ for (const BasicBlock &CalledBB : *CalledFunc) {
+ if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI()))
+ return false;
+ }
+ }
+ } else if (isAsynchronousEHPersonality(Personality)) {
+ // SEH is even less tolerant, there may not be any sort of exceptional
+ // funclet in the callee.
+ for (const BasicBlock &CalledBB : *CalledFunc) {
+ if (CalledBB.isEHPad())
+ return false;
+ }
+ }
+ }
+ }
+ }
+
+ // Determine if we are dealing with a call in an EHPad which does not unwind
+ // to caller.
+ bool EHPadForCallUnwindsLocally = false;
+ if (CallSiteEHPad && CS.isCall()) {
+ UnwindDestMemoTy FuncletUnwindMap;
+ Value *CallSiteUnwindDestToken =
+ getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap);
+
+ EHPadForCallUnwindsLocally =
+ CallSiteUnwindDestToken &&
+ !isa<ConstantTokenNone>(CallSiteUnwindDestToken);
+ }
+
+ // Get an iterator to the last basic block in the function, which will have
+ // the new function inlined after it.
+ Function::iterator LastBlock = --Caller->end();
+
+ // Make sure to capture all of the return instructions from the cloned
+ // function.
+ SmallVector<ReturnInst*, 8> Returns;
+ ClonedCodeInfo InlinedFunctionInfo;
+ Function::iterator FirstNewBlock;
+
+ { // Scope to destroy VMap after cloning.
+ ValueToValueMapTy VMap;
+ // Keep a list of pair (dst, src) to emit byval initializations.
+ SmallVector<std::pair<Value*, Value*>, 4> ByValInit;
+
+ auto &DL = Caller->getParent()->getDataLayout();
+
+ assert(CalledFunc->arg_size() == CS.arg_size() &&
+ "No varargs calls can be inlined!");
+
+ // Calculate the vector of arguments to pass into the function cloner, which
+ // matches up the formal to the actual argument values.
+ CallSite::arg_iterator AI = CS.arg_begin();
+ unsigned ArgNo = 0;
+ for (Function::arg_iterator I = CalledFunc->arg_begin(),
+ E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
+ Value *ActualArg = *AI;
+
+ // When byval arguments actually inlined, we need to make the copy implied
+ // by them explicit. However, we don't do this if the callee is readonly
+ // or readnone, because the copy would be unneeded: the callee doesn't
+ // modify the struct.
+ if (CS.isByValArgument(ArgNo)) {
+ ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
+ CalledFunc->getParamAlignment(ArgNo));
+ if (ActualArg != *AI)
+ ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
+ }
+
+ VMap[&*I] = ActualArg;
+ }
+
+ // Add alignment assumptions if necessary. We do this before the inlined
+ // instructions are actually cloned into the caller so that we can easily
+ // check what will be known at the start of the inlined code.
+ AddAlignmentAssumptions(CS, IFI);
+
+ // We want the inliner to prune the code as it copies. We would LOVE to
+ // have no dead or constant instructions leftover after inlining occurs
+ // (which can happen, e.g., because an argument was constant), but we'll be
+ // happy with whatever the cloner can do.
+ CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
+ /*ModuleLevelChanges=*/false, Returns, ".i",
+ &InlinedFunctionInfo, TheCall);
+ // Remember the first block that is newly cloned over.
+ FirstNewBlock = LastBlock; ++FirstNewBlock;
+
+ if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)
+ // Update the BFI of blocks cloned into the caller.
+ updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,
+ CalledFunc->front());
+
+ updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall,
+ IFI.PSI, IFI.CallerBFI);
+ // Update the profile count of callee.
+ updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc, IFI.PSI);
+
+ // Inject byval arguments initialization.
+ for (std::pair<Value*, Value*> &Init : ByValInit)
+ HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
+ &*FirstNewBlock, IFI);
+
+ Optional<OperandBundleUse> ParentDeopt =
+ CS.getOperandBundle(LLVMContext::OB_deopt);
+ if (ParentDeopt) {
+ SmallVector<OperandBundleDef, 2> OpDefs;
+
+ for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
+ Instruction *I = dyn_cast_or_null<Instruction>(VH);
+ if (!I) continue; // instruction was DCE'd or RAUW'ed to undef
+
+ OpDefs.clear();
+
+ CallSite ICS(I);
+ OpDefs.reserve(ICS.getNumOperandBundles());
+
+ for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) {
+ auto ChildOB = ICS.getOperandBundleAt(i);
+ if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
+ // If the inlined call has other operand bundles, let them be
+ OpDefs.emplace_back(ChildOB);
+ continue;
+ }
+
+ // It may be useful to separate this logic (of handling operand
+ // bundles) out to a separate "policy" component if this gets crowded.
+ // Prepend the parent's deoptimization continuation to the newly
+ // inlined call's deoptimization continuation.
+ std::vector<Value *> MergedDeoptArgs;
+ MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() +
+ ChildOB.Inputs.size());
+
+ MergedDeoptArgs.insert(MergedDeoptArgs.end(),
+ ParentDeopt->Inputs.begin(),
+ ParentDeopt->Inputs.end());
+ MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(),
+ ChildOB.Inputs.end());
+
+ OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs));
+ }
+
+ Instruction *NewI = nullptr;
+ if (isa<CallInst>(I))
+ NewI = CallInst::Create(cast<CallInst>(I), OpDefs, I);
+ else
+ NewI = InvokeInst::Create(cast<InvokeInst>(I), OpDefs, I);
+
+ // Note: the RAUW does the appropriate fixup in VMap, so we need to do
+ // this even if the call returns void.
+ I->replaceAllUsesWith(NewI);
+
+ VH = nullptr;
+ I->eraseFromParent();
+ }
+ }
+
+ // Update the callgraph if requested.
+ if (IFI.CG)
+ UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
+
+ // For 'nodebug' functions, the associated DISubprogram is always null.
+ // Conservatively avoid propagating the callsite debug location to
+ // instructions inlined from a function whose DISubprogram is not null.
+ fixupLineNumbers(Caller, FirstNewBlock, TheCall,
+ CalledFunc->getSubprogram() != nullptr);
+
+ // Clone existing noalias metadata if necessary.
+ CloneAliasScopeMetadata(CS, VMap);
+
+ // Add noalias metadata if necessary.
+ AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR);
+
+ // Propagate llvm.mem.parallel_loop_access if necessary.
+ PropagateParallelLoopAccessMetadata(CS, VMap);
+
+ // Register any cloned assumptions.
+ if (IFI.GetAssumptionCache)
+ for (BasicBlock &NewBlock :
+ make_range(FirstNewBlock->getIterator(), Caller->end()))
+ for (Instruction &I : NewBlock) {
+ if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ if (II->getIntrinsicID() == Intrinsic::assume)
+ (*IFI.GetAssumptionCache)(*Caller).registerAssumption(II);
+ }
+ }
+
+ // If there are any alloca instructions in the block that used to be the entry
+ // block for the callee, move them to the entry block of the caller. First
+ // calculate which instruction they should be inserted before. We insert the
+ // instructions at the end of the current alloca list.
+ {
+ BasicBlock::iterator InsertPoint = Caller->begin()->begin();
+ for (BasicBlock::iterator I = FirstNewBlock->begin(),
+ E = FirstNewBlock->end(); I != E; ) {
+ AllocaInst *AI = dyn_cast<AllocaInst>(I++);
+ if (!AI) continue;
+
+ // If the alloca is now dead, remove it. This often occurs due to code
+ // specialization.
+ if (AI->use_empty()) {
+ AI->eraseFromParent();
+ continue;
+ }
+
+ if (!allocaWouldBeStaticInEntry(AI))
+ continue;
+
+ // Keep track of the static allocas that we inline into the caller.
+ IFI.StaticAllocas.push_back(AI);
+
+ // Scan for the block of allocas that we can move over, and move them
+ // all at once.
+ while (isa<AllocaInst>(I) &&
+ allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) {
+ IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
+ ++I;
+ }
+
+ // Transfer all of the allocas over in a block. Using splice means
+ // that the instructions aren't removed from the symbol table, then
+ // reinserted.
+ Caller->getEntryBlock().getInstList().splice(
+ InsertPoint, FirstNewBlock->getInstList(), AI->getIterator(), I);
+ }
+ // Move any dbg.declares describing the allocas into the entry basic block.
+ DIBuilder DIB(*Caller->getParent());
+ for (auto &AI : IFI.StaticAllocas)
+ replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false);
+ }
+
+ bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
+ if (InlinedFunctionInfo.ContainsCalls) {
+ CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
+ if (CallInst *CI = dyn_cast<CallInst>(TheCall))
+ CallSiteTailKind = CI->getTailCallKind();
+
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
+ ++BB) {
+ for (Instruction &I : *BB) {
+ CallInst *CI = dyn_cast<CallInst>(&I);
+ if (!CI)
+ continue;
+
+ if (Function *F = CI->getCalledFunction())
+ InlinedDeoptimizeCalls |=
+ F->getIntrinsicID() == Intrinsic::experimental_deoptimize;
+
+ // We need to reduce the strength of any inlined tail calls. For
+ // musttail, we have to avoid introducing potential unbounded stack
+ // growth. For example, if functions 'f' and 'g' are mutually recursive
+ // with musttail, we can inline 'g' into 'f' so long as we preserve
+ // musttail on the cloned call to 'f'. If either the inlined call site
+ // or the cloned call site is *not* musttail, the program already has
+ // one frame of stack growth, so it's safe to remove musttail. Here is
+ // a table of example transformations:
+ //
+ // f -> musttail g -> musttail f ==> f -> musttail f
+ // f -> musttail g -> tail f ==> f -> tail f
+ // f -> g -> musttail f ==> f -> f
+ // f -> g -> tail f ==> f -> f
+ CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
+ ChildTCK = std::min(CallSiteTailKind, ChildTCK);
+ CI->setTailCallKind(ChildTCK);
+ InlinedMustTailCalls |= CI->isMustTailCall();
+
+ // Calls inlined through a 'nounwind' call site should be marked
+ // 'nounwind'.
+ if (MarkNoUnwind)
+ CI->setDoesNotThrow();
+ }
+ }
+ }
+
+ // Leave lifetime markers for the static alloca's, scoping them to the
+ // function we just inlined.
+ if (InsertLifetime && !IFI.StaticAllocas.empty()) {
+ IRBuilder<> builder(&FirstNewBlock->front());
+ for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
+ AllocaInst *AI = IFI.StaticAllocas[ai];
+ // Don't mark swifterror allocas. They can't have bitcast uses.
+ if (AI->isSwiftError())
+ continue;
+
+ // If the alloca is already scoped to something smaller than the whole
+ // function then there's no need to add redundant, less accurate markers.
+ if (hasLifetimeMarkers(AI))
+ continue;
+
+ // Try to determine the size of the allocation.
+ ConstantInt *AllocaSize = nullptr;
+ if (ConstantInt *AIArraySize =
+ dyn_cast<ConstantInt>(AI->getArraySize())) {
+ auto &DL = Caller->getParent()->getDataLayout();
+ Type *AllocaType = AI->getAllocatedType();
+ uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType);
+ uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
+
+ // Don't add markers for zero-sized allocas.
+ if (AllocaArraySize == 0)
+ continue;
+
+ // Check that array size doesn't saturate uint64_t and doesn't
+ // overflow when it's multiplied by type size.
+ if (AllocaArraySize != ~0ULL &&
+ UINT64_MAX / AllocaArraySize >= AllocaTypeSize) {
+ AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
+ AllocaArraySize * AllocaTypeSize);
+ }
+ }
+
+ builder.CreateLifetimeStart(AI, AllocaSize);
+ for (ReturnInst *RI : Returns) {
+ // Don't insert llvm.lifetime.end calls between a musttail or deoptimize
+ // call and a return. The return kills all local allocas.
+ if (InlinedMustTailCalls &&
+ RI->getParent()->getTerminatingMustTailCall())
+ continue;
+ if (InlinedDeoptimizeCalls &&
+ RI->getParent()->getTerminatingDeoptimizeCall())
+ continue;
+ IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize);
+ }
+ }
+ }
+
+ // If the inlined code contained dynamic alloca instructions, wrap the inlined
+ // code with llvm.stacksave/llvm.stackrestore intrinsics.
+ if (InlinedFunctionInfo.ContainsDynamicAllocas) {
+ Module *M = Caller->getParent();
+ // Get the two intrinsics we care about.
+ Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
+ Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
+
+ // Insert the llvm.stacksave.
+ CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin())
+ .CreateCall(StackSave, {}, "savedstack");
+
+ // Insert a call to llvm.stackrestore before any return instructions in the
+ // inlined function.
+ for (ReturnInst *RI : Returns) {
+ // Don't insert llvm.stackrestore calls between a musttail or deoptimize
+ // call and a return. The return will restore the stack pointer.
+ if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall())
+ continue;
+ if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall())
+ continue;
+ IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr);
+ }
+ }
+
+ // If we are inlining for an invoke instruction, we must make sure to rewrite
+ // any call instructions into invoke instructions. This is sensitive to which
+ // funclet pads were top-level in the inlinee, so must be done before
+ // rewriting the "parent pad" links.
+ if (auto *II = dyn_cast<InvokeInst>(TheCall)) {
+ BasicBlock *UnwindDest = II->getUnwindDest();
+ Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI();
+ if (isa<LandingPadInst>(FirstNonPHI)) {
+ HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo);
+ } else {
+ HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo);
+ }
+ }
+
+ // Update the lexical scopes of the new funclets and callsites.
+ // Anything that had 'none' as its parent is now nested inside the callsite's
+ // EHPad.
+
+ if (CallSiteEHPad) {
+ for (Function::iterator BB = FirstNewBlock->getIterator(),
+ E = Caller->end();
+ BB != E; ++BB) {
+ // Add bundle operands to any top-level call sites.
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) {
+ Instruction *I = &*BBI++;
+ CallSite CS(I);
+ if (!CS)
+ continue;
+
+ // Skip call sites which are nounwind intrinsics.
+ auto *CalledFn =
+ dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+ if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow())
+ continue;
+
+ // Skip call sites which already have a "funclet" bundle.
+ if (CS.getOperandBundle(LLVMContext::OB_funclet))
+ continue;
+
+ CS.getOperandBundlesAsDefs(OpBundles);
+ OpBundles.emplace_back("funclet", CallSiteEHPad);
+
+ Instruction *NewInst;
+ if (CS.isCall())
+ NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I);
+ else
+ NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I);
+ NewInst->takeName(I);
+ I->replaceAllUsesWith(NewInst);
+ I->eraseFromParent();
+
+ OpBundles.clear();
+ }
+
+ // It is problematic if the inlinee has a cleanupret which unwinds to
+ // caller and we inline it into a call site which doesn't unwind but into
+ // an EH pad that does. Such an edge must be dynamically unreachable.
+ // As such, we replace the cleanupret with unreachable.
+ if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator()))
+ if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally)
+ changeToUnreachable(CleanupRet, /*UseLLVMTrap=*/false);
+
+ Instruction *I = BB->getFirstNonPHI();
+ if (!I->isEHPad())
+ continue;
+
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
+ if (isa<ConstantTokenNone>(CatchSwitch->getParentPad()))
+ CatchSwitch->setParentPad(CallSiteEHPad);
+ } else {
+ auto *FPI = cast<FuncletPadInst>(I);
+ if (isa<ConstantTokenNone>(FPI->getParentPad()))
+ FPI->setParentPad(CallSiteEHPad);
+ }
+ }
+ }
+
+ if (InlinedDeoptimizeCalls) {
+ // We need to at least remove the deoptimizing returns from the Return set,
+ // so that the control flow from those returns does not get merged into the
+ // caller (but terminate it instead). If the caller's return type does not
+ // match the callee's return type, we also need to change the return type of
+ // the intrinsic.
+ if (Caller->getReturnType() == TheCall->getType()) {
+ auto NewEnd = remove_if(Returns, [](ReturnInst *RI) {
+ return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
+ });
+ Returns.erase(NewEnd, Returns.end());
+ } else {
+ SmallVector<ReturnInst *, 8> NormalReturns;
+ Function *NewDeoptIntrinsic = Intrinsic::getDeclaration(
+ Caller->getParent(), Intrinsic::experimental_deoptimize,
+ {Caller->getReturnType()});
+
+ for (ReturnInst *RI : Returns) {
+ CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall();
+ if (!DeoptCall) {
+ NormalReturns.push_back(RI);
+ continue;
+ }
+
+ // The calling convention on the deoptimize call itself may be bogus,
+ // since the code we're inlining may have undefined behavior (and may
+ // never actually execute at runtime); but all
+ // @llvm.experimental.deoptimize declarations have to have the same
+ // calling convention in a well-formed module.
+ auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv();
+ NewDeoptIntrinsic->setCallingConv(CallingConv);
+ auto *CurBB = RI->getParent();
+ RI->eraseFromParent();
+
+ SmallVector<Value *, 4> CallArgs(DeoptCall->arg_begin(),
+ DeoptCall->arg_end());
+
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ DeoptCall->getOperandBundlesAsDefs(OpBundles);
+ DeoptCall->eraseFromParent();
+ assert(!OpBundles.empty() &&
+ "Expected at least the deopt operand bundle");
+
+ IRBuilder<> Builder(CurBB);
+ CallInst *NewDeoptCall =
+ Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles);
+ NewDeoptCall->setCallingConv(CallingConv);
+ if (NewDeoptCall->getType()->isVoidTy())
+ Builder.CreateRetVoid();
+ else
+ Builder.CreateRet(NewDeoptCall);
+ }
+
+ // Leave behind the normal returns so we can merge control flow.
+ std::swap(Returns, NormalReturns);
+ }
+ }
+
+ // Handle any inlined musttail call sites. In order for a new call site to be
+ // musttail, the source of the clone and the inlined call site must have been
+ // musttail. Therefore it's safe to return without merging control into the
+ // phi below.
+ if (InlinedMustTailCalls) {
+ // Check if we need to bitcast the result of any musttail calls.
+ Type *NewRetTy = Caller->getReturnType();
+ bool NeedBitCast = !TheCall->use_empty() && TheCall->getType() != NewRetTy;
+
+ // Handle the returns preceded by musttail calls separately.
+ SmallVector<ReturnInst *, 8> NormalReturns;
+ for (ReturnInst *RI : Returns) {
+ CallInst *ReturnedMustTail =
+ RI->getParent()->getTerminatingMustTailCall();
+ if (!ReturnedMustTail) {
+ NormalReturns.push_back(RI);
+ continue;
+ }
+ if (!NeedBitCast)
+ continue;
+
+ // Delete the old return and any preceding bitcast.
+ BasicBlock *CurBB = RI->getParent();
+ auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue());
+ RI->eraseFromParent();
+ if (OldCast)
+ OldCast->eraseFromParent();
+
+ // Insert a new bitcast and return with the right type.
+ IRBuilder<> Builder(CurBB);
+ Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy));
+ }
+
+ // Leave behind the normal returns so we can merge control flow.
+ std::swap(Returns, NormalReturns);
+ }
+
+ // Now that all of the transforms on the inlined code have taken place but
+ // before we splice the inlined code into the CFG and lose track of which
+ // blocks were actually inlined, collect the call sites. We only do this if
+ // call graph updates weren't requested, as those provide value handle based
+ // tracking of inlined call sites instead.
+ if (InlinedFunctionInfo.ContainsCalls && !IFI.CG) {
+ // Otherwise just collect the raw call sites that were inlined.
+ for (BasicBlock &NewBB :
+ make_range(FirstNewBlock->getIterator(), Caller->end()))
+ for (Instruction &I : NewBB)
+ if (auto CS = CallSite(&I))
+ IFI.InlinedCallSites.push_back(CS);
+ }
+
+ // If we cloned in _exactly one_ basic block, and if that block ends in a
+ // return instruction, we splice the body of the inlined callee directly into
+ // the calling basic block.
+ if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
+ // Move all of the instructions right before the call.
+ OrigBB->getInstList().splice(TheCall->getIterator(),
+ FirstNewBlock->getInstList(),
+ FirstNewBlock->begin(), FirstNewBlock->end());
+ // Remove the cloned basic block.
+ Caller->getBasicBlockList().pop_back();
+
+ // If the call site was an invoke instruction, add a branch to the normal
+ // destination.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+ BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+ NewBr->setDebugLoc(Returns[0]->getDebugLoc());
+ }
+
+ // If the return instruction returned a value, replace uses of the call with
+ // uses of the returned value.
+ if (!TheCall->use_empty()) {
+ ReturnInst *R = Returns[0];
+ if (TheCall == R->getReturnValue())
+ TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ else
+ TheCall->replaceAllUsesWith(R->getReturnValue());
+ }
+ // Since we are now done with the Call/Invoke, we can delete it.
+ TheCall->eraseFromParent();
+
+ // Since we are now done with the return instruction, delete it also.
+ Returns[0]->eraseFromParent();
+
+ // We are now done with the inlining.
+ return true;
+ }
+
+ // Otherwise, we have the normal case, of more than one block to inline or
+ // multiple return sites.
+
+ // We want to clone the entire callee function into the hole between the
+ // "starter" and "ender" blocks. How we accomplish this depends on whether
+ // this is an invoke instruction or a call instruction.
+ BasicBlock *AfterCallBB;
+ BranchInst *CreatedBranchToNormalDest = nullptr;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+
+ // Add an unconditional branch to make this look like the CallInst case...
+ CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall);
+
+ // Split the basic block. This guarantees that no PHI nodes will have to be
+ // updated due to new incoming edges, and make the invoke case more
+ // symmetric to the call case.
+ AfterCallBB =
+ OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(),
+ CalledFunc->getName() + ".exit");
+
+ } else { // It's a call
+ // If this is a call instruction, we need to split the basic block that
+ // the call lives in.
+ //
+ AfterCallBB = OrigBB->splitBasicBlock(TheCall->getIterator(),
+ CalledFunc->getName() + ".exit");
+ }
+
+ if (IFI.CallerBFI) {
+ // Copy original BB's block frequency to AfterCallBB
+ IFI.CallerBFI->setBlockFreq(
+ AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency());
+ }
+
+ // Change the branch that used to go to AfterCallBB to branch to the first
+ // basic block of the inlined function.
+ //
+ TerminatorInst *Br = OrigBB->getTerminator();
+ assert(Br && Br->getOpcode() == Instruction::Br &&
+ "splitBasicBlock broken!");
+ Br->setOperand(0, &*FirstNewBlock);
+
+ // Now that the function is correct, make it a little bit nicer. In
+ // particular, move the basic blocks inserted from the end of the function
+ // into the space made by splitting the source basic block.
+ Caller->getBasicBlockList().splice(AfterCallBB->getIterator(),
+ Caller->getBasicBlockList(), FirstNewBlock,
+ Caller->end());
+
+ // Handle all of the return instructions that we just cloned in, and eliminate
+ // any users of the original call/invoke instruction.
+ Type *RTy = CalledFunc->getReturnType();
+
+ PHINode *PHI = nullptr;
+ if (Returns.size() > 1) {
+ // The PHI node should go at the front of the new basic block to merge all
+ // possible incoming values.
+ if (!TheCall->use_empty()) {
+ PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
+ &AfterCallBB->front());
+ // Anything that used the result of the function call should now use the
+ // PHI node as their operand.
+ TheCall->replaceAllUsesWith(PHI);
+ }
+
+ // Loop over all of the return instructions adding entries to the PHI node
+ // as appropriate.
+ if (PHI) {
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ ReturnInst *RI = Returns[i];
+ assert(RI->getReturnValue()->getType() == PHI->getType() &&
+ "Ret value not consistent in function!");
+ PHI->addIncoming(RI->getReturnValue(), RI->getParent());
+ }
+ }
+
+ // Add a branch to the merge points and remove return instructions.
+ DebugLoc Loc;
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+ ReturnInst *RI = Returns[i];
+ BranchInst* BI = BranchInst::Create(AfterCallBB, RI);
+ Loc = RI->getDebugLoc();
+ BI->setDebugLoc(Loc);
+ RI->eraseFromParent();
+ }
+ // We need to set the debug location to *somewhere* inside the
+ // inlined function. The line number may be nonsensical, but the
+ // instruction will at least be associated with the right
+ // function.
+ if (CreatedBranchToNormalDest)
+ CreatedBranchToNormalDest->setDebugLoc(Loc);
+ } else if (!Returns.empty()) {
+ // Otherwise, if there is exactly one return value, just replace anything
+ // using the return value of the call with the computed value.
+ if (!TheCall->use_empty()) {
+ if (TheCall == Returns[0]->getReturnValue())
+ TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ else
+ TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
+ }
+
+ // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
+ BasicBlock *ReturnBB = Returns[0]->getParent();
+ ReturnBB->replaceAllUsesWith(AfterCallBB);
+
+ // Splice the code from the return block into the block that it will return
+ // to, which contains the code that was after the call.
+ AfterCallBB->getInstList().splice(AfterCallBB->begin(),
+ ReturnBB->getInstList());
+
+ if (CreatedBranchToNormalDest)
+ CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
+
+ // Delete the return instruction now and empty ReturnBB now.
+ Returns[0]->eraseFromParent();
+ ReturnBB->eraseFromParent();
+ } else if (!TheCall->use_empty()) {
+ // No returns, but something is using the return value of the call. Just
+ // nuke the result.
+ TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+ }
+
+ // Since we are now done with the Call/Invoke, we can delete it.
+ TheCall->eraseFromParent();
+
+ // If we inlined any musttail calls and the original return is now
+ // unreachable, delete it. It can only contain a bitcast and ret.
+ if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB))
+ AfterCallBB->eraseFromParent();
+
+ // We should always be able to fold the entry block of the function into the
+ // single predecessor of the block...
+ assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
+ BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);
+
+ // Splice the code entry block into calling block, right before the
+ // unconditional branch.
+ CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes
+ OrigBB->getInstList().splice(Br->getIterator(), CalleeEntry->getInstList());
+
+ // Remove the unconditional branch.
+ OrigBB->getInstList().erase(Br);
+
+ // Now we can remove the CalleeEntry block, which is now empty.
+ Caller->getBasicBlockList().erase(CalleeEntry);
+
+ // If we inserted a phi node, check to see if it has a single value (e.g. all
+ // the entries are the same or undef). If so, remove the PHI so it doesn't
+ // block other optimizations.
+ if (PHI) {
+ AssumptionCache *AC =
+ IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr;
+ auto &DL = Caller->getParent()->getDataLayout();
+ if (Value *V = SimplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) {
+ PHI->replaceAllUsesWith(V);
+ PHI->eraseFromParent();
+ }
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
new file mode 100644
index 000000000000..23ec45edb3ef
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -0,0 +1,63 @@
+//===- InstructionNamer.cpp - Give anonymous instructions names -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a little utility pass that gives instructions names, this is mostly
+// useful when diffing the effect of an optimization because deleting an
+// unnamed instruction can change all other instruction numbering, making the
+// diff very noisy.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+namespace {
+ struct InstNamer : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ InstNamer() : FunctionPass(ID) {
+ initializeInstNamerPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &Info) const override {
+ Info.setPreservesAll();
+ }
+
+ bool runOnFunction(Function &F) override {
+ for (auto &Arg : F.args())
+ if (!Arg.hasName())
+ Arg.setName("arg");
+
+ for (BasicBlock &BB : F) {
+ if (!BB.hasName())
+ BB.setName("bb");
+
+ for (Instruction &I : BB)
+ if (!I.hasName() && !I.getType()->isVoidTy())
+ I.setName("tmp");
+ }
+ return true;
+ }
+ };
+
+ char InstNamer::ID = 0;
+}
+
+INITIALIZE_PASS(InstNamer, "instnamer",
+ "Assign names to anonymous instructions", false, false)
+char &llvm::InstructionNamerID = InstNamer::ID;
+//===----------------------------------------------------------------------===//
+//
+// InstructionNamer - Give any unnamed non-void instructions "tmp" names.
+//
+FunctionPass *llvm::createInstructionNamerPass() {
+ return new InstNamer();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
new file mode 100644
index 000000000000..5a90dcb033b2
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
@@ -0,0 +1,674 @@
+//===-- IntegerDivision.cpp - Expand integer division ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of 32bit and 64bit scalar integer
+// division for targets that don't have native support. It's largely derived
+// from compiler-rt's implementations of __udivsi3 and __udivmoddi4,
+// but hand-tuned for targets that prefer less control flow.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/IntegerDivision.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "integer-division"
+
+/// Generate code to compute the remainder of two signed integers. Returns the
+/// remainder, which will have the sign of the dividend. Builder's insert point
+/// should be pointing where the caller wants code generated, e.g. at the srem
+/// instruction. This will generate a urem in the process, and Builder's insert
+/// point will be pointing at the uren (if present, i.e. not folded), ready to
+/// be expanded if the user wishes
+static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ unsigned BitWidth = Dividend->getType()->getIntegerBitWidth();
+ ConstantInt *Shift;
+
+ if (BitWidth == 64) {
+ Shift = Builder.getInt64(63);
+ } else {
+ assert(BitWidth == 32 && "Unexpected bit width");
+ Shift = Builder.getInt32(31);
+ }
+
+ // Following instructions are generated for both i32 (shift 31) and
+ // i64 (shift 63).
+
+ // ; %dividend_sgn = ashr i32 %dividend, 31
+ // ; %divisor_sgn = ashr i32 %divisor, 31
+ // ; %dvd_xor = xor i32 %dividend, %dividend_sgn
+ // ; %dvs_xor = xor i32 %divisor, %divisor_sgn
+ // ; %u_dividend = sub i32 %dvd_xor, %dividend_sgn
+ // ; %u_divisor = sub i32 %dvs_xor, %divisor_sgn
+ // ; %urem = urem i32 %dividend, %divisor
+ // ; %xored = xor i32 %urem, %dividend_sgn
+ // ; %srem = sub i32 %xored, %dividend_sgn
+ Value *DividendSign = Builder.CreateAShr(Dividend, Shift);
+ Value *DivisorSign = Builder.CreateAShr(Divisor, Shift);
+ Value *DvdXor = Builder.CreateXor(Dividend, DividendSign);
+ Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign);
+ Value *UDividend = Builder.CreateSub(DvdXor, DividendSign);
+ Value *UDivisor = Builder.CreateSub(DvsXor, DivisorSign);
+ Value *URem = Builder.CreateURem(UDividend, UDivisor);
+ Value *Xored = Builder.CreateXor(URem, DividendSign);
+ Value *SRem = Builder.CreateSub(Xored, DividendSign);
+
+ if (Instruction *URemInst = dyn_cast<Instruction>(URem))
+ Builder.SetInsertPoint(URemInst);
+
+ return SRem;
+}
+
+
+/// Generate code to compute the remainder of two unsigned integers. Returns the
+/// remainder. Builder's insert point should be pointing where the caller wants
+/// code generated, e.g. at the urem instruction. This will generate a udiv in
+/// the process, and Builder's insert point will be pointing at the udiv (if
+/// present, i.e. not folded), ready to be expanded if the user wishes
+static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ // Remainder = Dividend - Quotient*Divisor
+
+ // Following instructions are generated for both i32 and i64
+
+ // ; %quotient = udiv i32 %dividend, %divisor
+ // ; %product = mul i32 %divisor, %quotient
+ // ; %remainder = sub i32 %dividend, %product
+ Value *Quotient = Builder.CreateUDiv(Dividend, Divisor);
+ Value *Product = Builder.CreateMul(Divisor, Quotient);
+ Value *Remainder = Builder.CreateSub(Dividend, Product);
+
+ if (Instruction *UDiv = dyn_cast<Instruction>(Quotient))
+ Builder.SetInsertPoint(UDiv);
+
+ return Remainder;
+}
+
+/// Generate code to divide two signed integers. Returns the quotient, rounded
+/// towards 0. Builder's insert point should be pointing where the caller wants
+/// code generated, e.g. at the sdiv instruction. This will generate a udiv in
+/// the process, and Builder's insert point will be pointing at the udiv (if
+/// present, i.e. not folded), ready to be expanded if the user wishes.
+static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ // Implementation taken from compiler-rt's __divsi3 and __divdi3
+
+ unsigned BitWidth = Dividend->getType()->getIntegerBitWidth();
+ ConstantInt *Shift;
+
+ if (BitWidth == 64) {
+ Shift = Builder.getInt64(63);
+ } else {
+ assert(BitWidth == 32 && "Unexpected bit width");
+ Shift = Builder.getInt32(31);
+ }
+
+ // Following instructions are generated for both i32 (shift 31) and
+ // i64 (shift 63).
+
+ // ; %tmp = ashr i32 %dividend, 31
+ // ; %tmp1 = ashr i32 %divisor, 31
+ // ; %tmp2 = xor i32 %tmp, %dividend
+ // ; %u_dvnd = sub nsw i32 %tmp2, %tmp
+ // ; %tmp3 = xor i32 %tmp1, %divisor
+ // ; %u_dvsr = sub nsw i32 %tmp3, %tmp1
+ // ; %q_sgn = xor i32 %tmp1, %tmp
+ // ; %q_mag = udiv i32 %u_dvnd, %u_dvsr
+ // ; %tmp4 = xor i32 %q_mag, %q_sgn
+ // ; %q = sub i32 %tmp4, %q_sgn
+ Value *Tmp = Builder.CreateAShr(Dividend, Shift);
+ Value *Tmp1 = Builder.CreateAShr(Divisor, Shift);
+ Value *Tmp2 = Builder.CreateXor(Tmp, Dividend);
+ Value *U_Dvnd = Builder.CreateSub(Tmp2, Tmp);
+ Value *Tmp3 = Builder.CreateXor(Tmp1, Divisor);
+ Value *U_Dvsr = Builder.CreateSub(Tmp3, Tmp1);
+ Value *Q_Sgn = Builder.CreateXor(Tmp1, Tmp);
+ Value *Q_Mag = Builder.CreateUDiv(U_Dvnd, U_Dvsr);
+ Value *Tmp4 = Builder.CreateXor(Q_Mag, Q_Sgn);
+ Value *Q = Builder.CreateSub(Tmp4, Q_Sgn);
+
+ if (Instruction *UDiv = dyn_cast<Instruction>(Q_Mag))
+ Builder.SetInsertPoint(UDiv);
+
+ return Q;
+}
+
+/// Generates code to divide two unsigned scalar 32-bit or 64-bit integers.
+/// Returns the quotient, rounded towards 0. Builder's insert point should
+/// point where the caller wants code generated, e.g. at the udiv instruction.
+static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ // The basic algorithm can be found in the compiler-rt project's
+ // implementation of __udivsi3.c. Here, we do a lower-level IR based approach
+ // that's been hand-tuned to lessen the amount of control flow involved.
+
+ // Some helper values
+ IntegerType *DivTy = cast<IntegerType>(Dividend->getType());
+ unsigned BitWidth = DivTy->getBitWidth();
+
+ ConstantInt *Zero;
+ ConstantInt *One;
+ ConstantInt *NegOne;
+ ConstantInt *MSB;
+
+ if (BitWidth == 64) {
+ Zero = Builder.getInt64(0);
+ One = Builder.getInt64(1);
+ NegOne = ConstantInt::getSigned(DivTy, -1);
+ MSB = Builder.getInt64(63);
+ } else {
+ assert(BitWidth == 32 && "Unexpected bit width");
+ Zero = Builder.getInt32(0);
+ One = Builder.getInt32(1);
+ NegOne = ConstantInt::getSigned(DivTy, -1);
+ MSB = Builder.getInt32(31);
+ }
+
+ ConstantInt *True = Builder.getTrue();
+
+ BasicBlock *IBB = Builder.GetInsertBlock();
+ Function *F = IBB->getParent();
+ Function *CTLZ = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
+ DivTy);
+
+ // Our CFG is going to look like:
+ // +---------------------+
+ // | special-cases |
+ // | ... |
+ // +---------------------+
+ // | |
+ // | +----------+
+ // | | bb1 |
+ // | | ... |
+ // | +----------+
+ // | | |
+ // | | +------------+
+ // | | | preheader |
+ // | | | ... |
+ // | | +------------+
+ // | | |
+ // | | | +---+
+ // | | | | |
+ // | | +------------+ |
+ // | | | do-while | |
+ // | | | ... | |
+ // | | +------------+ |
+ // | | | | |
+ // | +-----------+ +---+
+ // | | loop-exit |
+ // | | ... |
+ // | +-----------+
+ // | |
+ // +-------+
+ // | ... |
+ // | end |
+ // +-------+
+ BasicBlock *SpecialCases = Builder.GetInsertBlock();
+ SpecialCases->setName(Twine(SpecialCases->getName(), "_udiv-special-cases"));
+ BasicBlock *End = SpecialCases->splitBasicBlock(Builder.GetInsertPoint(),
+ "udiv-end");
+ BasicBlock *LoopExit = BasicBlock::Create(Builder.getContext(),
+ "udiv-loop-exit", F, End);
+ BasicBlock *DoWhile = BasicBlock::Create(Builder.getContext(),
+ "udiv-do-while", F, End);
+ BasicBlock *Preheader = BasicBlock::Create(Builder.getContext(),
+ "udiv-preheader", F, End);
+ BasicBlock *BB1 = BasicBlock::Create(Builder.getContext(),
+ "udiv-bb1", F, End);
+
+ // We'll be overwriting the terminator to insert our extra blocks
+ SpecialCases->getTerminator()->eraseFromParent();
+
+ // Same instructions are generated for both i32 (msb 31) and i64 (msb 63).
+
+ // First off, check for special cases: dividend or divisor is zero, divisor
+ // is greater than dividend, and divisor is 1.
+ // ; special-cases:
+ // ; %ret0_1 = icmp eq i32 %divisor, 0
+ // ; %ret0_2 = icmp eq i32 %dividend, 0
+ // ; %ret0_3 = or i1 %ret0_1, %ret0_2
+ // ; %tmp0 = tail call i32 @llvm.ctlz.i32(i32 %divisor, i1 true)
+ // ; %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true)
+ // ; %sr = sub nsw i32 %tmp0, %tmp1
+ // ; %ret0_4 = icmp ugt i32 %sr, 31
+ // ; %ret0 = or i1 %ret0_3, %ret0_4
+ // ; %retDividend = icmp eq i32 %sr, 31
+ // ; %retVal = select i1 %ret0, i32 0, i32 %dividend
+ // ; %earlyRet = or i1 %ret0, %retDividend
+ // ; br i1 %earlyRet, label %end, label %bb1
+ Builder.SetInsertPoint(SpecialCases);
+ Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero);
+ Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero);
+ Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2);
+ Value *Tmp0 = Builder.CreateCall(CTLZ, {Divisor, True});
+ Value *Tmp1 = Builder.CreateCall(CTLZ, {Dividend, True});
+ Value *SR = Builder.CreateSub(Tmp0, Tmp1);
+ Value *Ret0_4 = Builder.CreateICmpUGT(SR, MSB);
+ Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4);
+ Value *RetDividend = Builder.CreateICmpEQ(SR, MSB);
+ Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend);
+ Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend);
+ Builder.CreateCondBr(EarlyRet, End, BB1);
+
+ // ; bb1: ; preds = %special-cases
+ // ; %sr_1 = add i32 %sr, 1
+ // ; %tmp2 = sub i32 31, %sr
+ // ; %q = shl i32 %dividend, %tmp2
+ // ; %skipLoop = icmp eq i32 %sr_1, 0
+ // ; br i1 %skipLoop, label %loop-exit, label %preheader
+ Builder.SetInsertPoint(BB1);
+ Value *SR_1 = Builder.CreateAdd(SR, One);
+ Value *Tmp2 = Builder.CreateSub(MSB, SR);
+ Value *Q = Builder.CreateShl(Dividend, Tmp2);
+ Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero);
+ Builder.CreateCondBr(SkipLoop, LoopExit, Preheader);
+
+ // ; preheader: ; preds = %bb1
+ // ; %tmp3 = lshr i32 %dividend, %sr_1
+ // ; %tmp4 = add i32 %divisor, -1
+ // ; br label %do-while
+ Builder.SetInsertPoint(Preheader);
+ Value *Tmp3 = Builder.CreateLShr(Dividend, SR_1);
+ Value *Tmp4 = Builder.CreateAdd(Divisor, NegOne);
+ Builder.CreateBr(DoWhile);
+
+ // ; do-while: ; preds = %do-while, %preheader
+ // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
+ // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ]
+ // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ]
+ // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ]
+ // ; %tmp5 = shl i32 %r_1, 1
+ // ; %tmp6 = lshr i32 %q_2, 31
+ // ; %tmp7 = or i32 %tmp5, %tmp6
+ // ; %tmp8 = shl i32 %q_2, 1
+ // ; %q_1 = or i32 %carry_1, %tmp8
+ // ; %tmp9 = sub i32 %tmp4, %tmp7
+ // ; %tmp10 = ashr i32 %tmp9, 31
+ // ; %carry = and i32 %tmp10, 1
+ // ; %tmp11 = and i32 %tmp10, %divisor
+ // ; %r = sub i32 %tmp7, %tmp11
+ // ; %sr_2 = add i32 %sr_3, -1
+ // ; %tmp12 = icmp eq i32 %sr_2, 0
+ // ; br i1 %tmp12, label %loop-exit, label %do-while
+ Builder.SetInsertPoint(DoWhile);
+ PHINode *Carry_1 = Builder.CreatePHI(DivTy, 2);
+ PHINode *SR_3 = Builder.CreatePHI(DivTy, 2);
+ PHINode *R_1 = Builder.CreatePHI(DivTy, 2);
+ PHINode *Q_2 = Builder.CreatePHI(DivTy, 2);
+ Value *Tmp5 = Builder.CreateShl(R_1, One);
+ Value *Tmp6 = Builder.CreateLShr(Q_2, MSB);
+ Value *Tmp7 = Builder.CreateOr(Tmp5, Tmp6);
+ Value *Tmp8 = Builder.CreateShl(Q_2, One);
+ Value *Q_1 = Builder.CreateOr(Carry_1, Tmp8);
+ Value *Tmp9 = Builder.CreateSub(Tmp4, Tmp7);
+ Value *Tmp10 = Builder.CreateAShr(Tmp9, MSB);
+ Value *Carry = Builder.CreateAnd(Tmp10, One);
+ Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor);
+ Value *R = Builder.CreateSub(Tmp7, Tmp11);
+ Value *SR_2 = Builder.CreateAdd(SR_3, NegOne);
+ Value *Tmp12 = Builder.CreateICmpEQ(SR_2, Zero);
+ Builder.CreateCondBr(Tmp12, LoopExit, DoWhile);
+
+ // ; loop-exit: ; preds = %do-while, %bb1
+ // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
+ // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ]
+ // ; %tmp13 = shl i32 %q_3, 1
+ // ; %q_4 = or i32 %carry_2, %tmp13
+ // ; br label %end
+ Builder.SetInsertPoint(LoopExit);
+ PHINode *Carry_2 = Builder.CreatePHI(DivTy, 2);
+ PHINode *Q_3 = Builder.CreatePHI(DivTy, 2);
+ Value *Tmp13 = Builder.CreateShl(Q_3, One);
+ Value *Q_4 = Builder.CreateOr(Carry_2, Tmp13);
+ Builder.CreateBr(End);
+
+ // ; end: ; preds = %loop-exit, %special-cases
+ // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
+ // ; ret i32 %q_5
+ Builder.SetInsertPoint(End, End->begin());
+ PHINode *Q_5 = Builder.CreatePHI(DivTy, 2);
+
+ // Populate the Phis, since all values have now been created. Our Phis were:
+ // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
+ Carry_1->addIncoming(Zero, Preheader);
+ Carry_1->addIncoming(Carry, DoWhile);
+ // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ]
+ SR_3->addIncoming(SR_1, Preheader);
+ SR_3->addIncoming(SR_2, DoWhile);
+ // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ]
+ R_1->addIncoming(Tmp3, Preheader);
+ R_1->addIncoming(R, DoWhile);
+ // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ]
+ Q_2->addIncoming(Q, Preheader);
+ Q_2->addIncoming(Q_1, DoWhile);
+ // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
+ Carry_2->addIncoming(Zero, BB1);
+ Carry_2->addIncoming(Carry, DoWhile);
+ // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ]
+ Q_3->addIncoming(Q, BB1);
+ Q_3->addIncoming(Q_1, DoWhile);
+ // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
+ Q_5->addIncoming(Q_4, LoopExit);
+ Q_5->addIncoming(RetVal, SpecialCases);
+
+ return Q_5;
+}
+
+/// Generate code to calculate the remainder of two integers, replacing Rem with
+/// the generated code. This currently generates code using the udiv expansion,
+/// but future work includes generating more specialized code, e.g. when more
+/// information about the operands are known. Implements both 32bit and 64bit
+/// scalar division.
+///
+/// @brief Replace Rem with generated code.
+bool llvm::expandRemainder(BinaryOperator *Rem) {
+ assert((Rem->getOpcode() == Instruction::SRem ||
+ Rem->getOpcode() == Instruction::URem) &&
+ "Trying to expand remainder from a non-remainder function");
+
+ IRBuilder<> Builder(Rem);
+
+ assert(!Rem->getType()->isVectorTy() && "Div over vectors not supported");
+ assert((Rem->getType()->getIntegerBitWidth() == 32 ||
+ Rem->getType()->getIntegerBitWidth() == 64) &&
+ "Div of bitwidth other than 32 or 64 not supported");
+
+ // First prepare the sign if it's a signed remainder
+ if (Rem->getOpcode() == Instruction::SRem) {
+ Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0),
+ Rem->getOperand(1), Builder);
+
+ // Check whether this is the insert point while Rem is still valid.
+ bool IsInsertPoint = Rem->getIterator() == Builder.GetInsertPoint();
+ Rem->replaceAllUsesWith(Remainder);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ // If we didn't actually generate an urem instruction, we're done
+ // This happens for example if the input were constant. In this case the
+ // Builder insertion point was unchanged
+ if (IsInsertPoint)
+ return true;
+
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
+ Rem = BO;
+ }
+
+ Value *Remainder = generatedUnsignedRemainderCode(Rem->getOperand(0),
+ Rem->getOperand(1),
+ Builder);
+
+ Rem->replaceAllUsesWith(Remainder);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ // Expand the udiv
+ if (BinaryOperator *UDiv = dyn_cast<BinaryOperator>(Builder.GetInsertPoint())) {
+ assert(UDiv->getOpcode() == Instruction::UDiv && "Non-udiv in expansion?");
+ expandDivision(UDiv);
+ }
+
+ return true;
+}
+
+
+/// Generate code to divide two integers, replacing Div with the generated
+/// code. This currently generates code similarly to compiler-rt's
+/// implementations, but future work includes generating more specialized code
+/// when more information about the operands are known. Implements both
+/// 32bit and 64bit scalar division.
+///
+/// @brief Replace Div with generated code.
+bool llvm::expandDivision(BinaryOperator *Div) {
+ assert((Div->getOpcode() == Instruction::SDiv ||
+ Div->getOpcode() == Instruction::UDiv) &&
+ "Trying to expand division from a non-division function");
+
+ IRBuilder<> Builder(Div);
+
+ assert(!Div->getType()->isVectorTy() && "Div over vectors not supported");
+ assert((Div->getType()->getIntegerBitWidth() == 32 ||
+ Div->getType()->getIntegerBitWidth() == 64) &&
+ "Div of bitwidth other than 32 or 64 not supported");
+
+ // First prepare the sign if it's a signed division
+ if (Div->getOpcode() == Instruction::SDiv) {
+ // Lower the code to unsigned division, and reset Div to point to the udiv.
+ Value *Quotient = generateSignedDivisionCode(Div->getOperand(0),
+ Div->getOperand(1), Builder);
+
+ // Check whether this is the insert point while Div is still valid.
+ bool IsInsertPoint = Div->getIterator() == Builder.GetInsertPoint();
+ Div->replaceAllUsesWith(Quotient);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ // If we didn't actually generate an udiv instruction, we're done
+ // This happens for example if the input were constant. In this case the
+ // Builder insertion point was unchanged
+ if (IsInsertPoint)
+ return true;
+
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
+ Div = BO;
+ }
+
+ // Insert the unsigned division code
+ Value *Quotient = generateUnsignedDivisionCode(Div->getOperand(0),
+ Div->getOperand(1),
+ Builder);
+ Div->replaceAllUsesWith(Quotient);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ return true;
+}
+
+/// Generate code to compute the remainder of two integers of bitwidth up to
+/// 32 bits. Uses the above routines and extends the inputs/truncates the
+/// outputs to operate in 32 bits; that is, these routines are good for targets
+/// that have no or very little suppport for smaller than 32 bit integer
+/// arithmetic.
+///
+/// @brief Replace Rem with emulation code.
+bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
+ assert((Rem->getOpcode() == Instruction::SRem ||
+ Rem->getOpcode() == Instruction::URem) &&
+ "Trying to expand remainder from a non-remainder function");
+
+ Type *RemTy = Rem->getType();
+ assert(!RemTy->isVectorTy() && "Div over vectors not supported");
+
+ unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
+
+ assert(RemTyBitWidth <= 32 &&
+ "Div of bitwidth greater than 32 not supported");
+
+ if (RemTyBitWidth == 32)
+ return expandRemainder(Rem);
+
+ // If bitwidth smaller than 32 extend inputs, extend output and proceed
+ // with 32 bit division.
+ IRBuilder<> Builder(Rem);
+
+ Value *ExtDividend;
+ Value *ExtDivisor;
+ Value *ExtRem;
+ Value *Trunc;
+ Type *Int32Ty = Builder.getInt32Ty();
+
+ if (Rem->getOpcode() == Instruction::SRem) {
+ ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int32Ty);
+ ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int32Ty);
+ ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor);
+ } else {
+ ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int32Ty);
+ ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int32Ty);
+ ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor);
+ }
+ Trunc = Builder.CreateTrunc(ExtRem, RemTy);
+
+ Rem->replaceAllUsesWith(Trunc);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ return expandRemainder(cast<BinaryOperator>(ExtRem));
+}
+
+/// Generate code to compute the remainder of two integers of bitwidth up to
+/// 64 bits. Uses the above routines and extends the inputs/truncates the
+/// outputs to operate in 64 bits.
+///
+/// @brief Replace Rem with emulation code.
+bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) {
+ assert((Rem->getOpcode() == Instruction::SRem ||
+ Rem->getOpcode() == Instruction::URem) &&
+ "Trying to expand remainder from a non-remainder function");
+
+ Type *RemTy = Rem->getType();
+ assert(!RemTy->isVectorTy() && "Div over vectors not supported");
+
+ unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
+
+ assert(RemTyBitWidth <= 64 && "Div of bitwidth greater than 64 not supported");
+
+ if (RemTyBitWidth == 64)
+ return expandRemainder(Rem);
+
+ // If bitwidth smaller than 64 extend inputs, extend output and proceed
+ // with 64 bit division.
+ IRBuilder<> Builder(Rem);
+
+ Value *ExtDividend;
+ Value *ExtDivisor;
+ Value *ExtRem;
+ Value *Trunc;
+ Type *Int64Ty = Builder.getInt64Ty();
+
+ if (Rem->getOpcode() == Instruction::SRem) {
+ ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int64Ty);
+ ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int64Ty);
+ ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor);
+ } else {
+ ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int64Ty);
+ ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int64Ty);
+ ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor);
+ }
+ Trunc = Builder.CreateTrunc(ExtRem, RemTy);
+
+ Rem->replaceAllUsesWith(Trunc);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ return expandRemainder(cast<BinaryOperator>(ExtRem));
+}
+
+/// Generate code to divide two integers of bitwidth up to 32 bits. Uses the
+/// above routines and extends the inputs/truncates the outputs to operate
+/// in 32 bits; that is, these routines are good for targets that have no
+/// or very little support for smaller than 32 bit integer arithmetic.
+///
+/// @brief Replace Div with emulation code.
+bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
+ assert((Div->getOpcode() == Instruction::SDiv ||
+ Div->getOpcode() == Instruction::UDiv) &&
+ "Trying to expand division from a non-division function");
+
+ Type *DivTy = Div->getType();
+ assert(!DivTy->isVectorTy() && "Div over vectors not supported");
+
+ unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
+
+ assert(DivTyBitWidth <= 32 && "Div of bitwidth greater than 32 not supported");
+
+ if (DivTyBitWidth == 32)
+ return expandDivision(Div);
+
+ // If bitwidth smaller than 32 extend inputs, extend output and proceed
+ // with 32 bit division.
+ IRBuilder<> Builder(Div);
+
+ Value *ExtDividend;
+ Value *ExtDivisor;
+ Value *ExtDiv;
+ Value *Trunc;
+ Type *Int32Ty = Builder.getInt32Ty();
+
+ if (Div->getOpcode() == Instruction::SDiv) {
+ ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int32Ty);
+ ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int32Ty);
+ ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor);
+ } else {
+ ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int32Ty);
+ ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int32Ty);
+ ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
+ }
+ Trunc = Builder.CreateTrunc(ExtDiv, DivTy);
+
+ Div->replaceAllUsesWith(Trunc);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ return expandDivision(cast<BinaryOperator>(ExtDiv));
+}
+
+/// Generate code to divide two integers of bitwidth up to 64 bits. Uses the
+/// above routines and extends the inputs/truncates the outputs to operate
+/// in 64 bits.
+///
+/// @brief Replace Div with emulation code.
+bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) {
+ assert((Div->getOpcode() == Instruction::SDiv ||
+ Div->getOpcode() == Instruction::UDiv) &&
+ "Trying to expand division from a non-division function");
+
+ Type *DivTy = Div->getType();
+ assert(!DivTy->isVectorTy() && "Div over vectors not supported");
+
+ unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
+
+ assert(DivTyBitWidth <= 64 &&
+ "Div of bitwidth greater than 64 not supported");
+
+ if (DivTyBitWidth == 64)
+ return expandDivision(Div);
+
+ // If bitwidth smaller than 64 extend inputs, extend output and proceed
+ // with 64 bit division.
+ IRBuilder<> Builder(Div);
+
+ Value *ExtDividend;
+ Value *ExtDivisor;
+ Value *ExtDiv;
+ Value *Trunc;
+ Type *Int64Ty = Builder.getInt64Ty();
+
+ if (Div->getOpcode() == Instruction::SDiv) {
+ ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int64Ty);
+ ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int64Ty);
+ ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor);
+ } else {
+ ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int64Ty);
+ ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int64Ty);
+ ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
+ }
+ Trunc = Builder.CreateTrunc(ExtDiv, DivTy);
+
+ Div->replaceAllUsesWith(Trunc);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ return expandDivision(cast<BinaryOperator>(ExtDiv));
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
new file mode 100644
index 000000000000..089f2b5f3b18
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -0,0 +1,438 @@
+//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms loops by placing phi nodes at the end of the loops for
+// all values that are live across the loop boundary. For example, it turns
+// the left into the right code:
+//
+// for (...) for (...)
+// if (c) if (c)
+// X1 = ... X1 = ...
+// else else
+// X2 = ... X2 = ...
+// X3 = phi(X1, X2) X3 = phi(X1, X2)
+// ... = X3 + 4 X4 = phi(X3)
+// ... = X4 + 4
+//
+// This is still valid LLVM; the extra phi nodes are purely redundant, and will
+// be trivially eliminated by InstCombine. The major benefit of this
+// transformation is that it makes many other loop optimizations, such as
+// LoopUnswitching, simpler.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LCSSA.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PredIteratorCache.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "lcssa"
+
+STATISTIC(NumLCSSA, "Number of live out of a loop variables");
+
+#ifdef EXPENSIVE_CHECKS
+static bool VerifyLoopLCSSA = true;
+#else
+static bool VerifyLoopLCSSA = false;
+#endif
+static cl::opt<bool,true>
+VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA),
+ cl::desc("Verify loop lcssa form (time consuming)"));
+
+/// Return true if the specified block is in the list.
+static bool isExitBlock(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
+ return is_contained(ExitBlocks, BB);
+}
+
+/// For every instruction from the worklist, check to see if it has any uses
+/// that are outside the current loop. If so, insert LCSSA PHI nodes and
+/// rewrite the uses.
+bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
+ DominatorTree &DT, LoopInfo &LI) {
+ SmallVector<Use *, 16> UsesToRewrite;
+ SmallSetVector<PHINode *, 16> PHIsToRemove;
+ PredIteratorCache PredCache;
+ bool Changed = false;
+
+ // Cache the Loop ExitBlocks across this loop. We expect to get a lot of
+ // instructions within the same loops, computing the exit blocks is
+ // expensive, and we're not mutating the loop structure.
+ SmallDenseMap<Loop*, SmallVector<BasicBlock *,1>> LoopExitBlocks;
+
+ while (!Worklist.empty()) {
+ UsesToRewrite.clear();
+
+ Instruction *I = Worklist.pop_back_val();
+ assert(!I->getType()->isTokenTy() && "Tokens shouldn't be in the worklist");
+ BasicBlock *InstBB = I->getParent();
+ Loop *L = LI.getLoopFor(InstBB);
+ assert(L && "Instruction belongs to a BB that's not part of a loop");
+ if (!LoopExitBlocks.count(L))
+ L->getExitBlocks(LoopExitBlocks[L]);
+ assert(LoopExitBlocks.count(L));
+ const SmallVectorImpl<BasicBlock *> &ExitBlocks = LoopExitBlocks[L];
+
+ if (ExitBlocks.empty())
+ continue;
+
+ for (Use &U : I->uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
+ BasicBlock *UserBB = User->getParent();
+ if (auto *PN = dyn_cast<PHINode>(User))
+ UserBB = PN->getIncomingBlock(U);
+
+ if (InstBB != UserBB && !L->contains(UserBB))
+ UsesToRewrite.push_back(&U);
+ }
+
+ // If there are no uses outside the loop, exit with no change.
+ if (UsesToRewrite.empty())
+ continue;
+
+ ++NumLCSSA; // We are applying the transformation
+
+ // Invoke instructions are special in that their result value is not
+ // available along their unwind edge. The code below tests to see whether
+ // DomBB dominates the value, so adjust DomBB to the normal destination
+ // block, which is effectively where the value is first usable.
+ BasicBlock *DomBB = InstBB;
+ if (auto *Inv = dyn_cast<InvokeInst>(I))
+ DomBB = Inv->getNormalDest();
+
+ DomTreeNode *DomNode = DT.getNode(DomBB);
+
+ SmallVector<PHINode *, 16> AddedPHIs;
+ SmallVector<PHINode *, 8> PostProcessPHIs;
+
+ SmallVector<PHINode *, 4> InsertedPHIs;
+ SSAUpdater SSAUpdate(&InsertedPHIs);
+ SSAUpdate.Initialize(I->getType(), I->getName());
+
+ // Insert the LCSSA phi's into all of the exit blocks dominated by the
+ // value, and add them to the Phi's map.
+ for (BasicBlock *ExitBB : ExitBlocks) {
+ if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
+ continue;
+
+ // If we already inserted something for this BB, don't reprocess it.
+ if (SSAUpdate.HasValueForBlock(ExitBB))
+ continue;
+
+ PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB),
+ I->getName() + ".lcssa", &ExitBB->front());
+
+ // Add inputs from inside the loop for this PHI.
+ for (BasicBlock *Pred : PredCache.get(ExitBB)) {
+ PN->addIncoming(I, Pred);
+
+ // If the exit block has a predecessor not within the loop, arrange for
+ // the incoming value use corresponding to that predecessor to be
+ // rewritten in terms of a different LCSSA PHI.
+ if (!L->contains(Pred))
+ UsesToRewrite.push_back(
+ &PN->getOperandUse(PN->getOperandNumForIncomingValue(
+ PN->getNumIncomingValues() - 1)));
+ }
+
+ AddedPHIs.push_back(PN);
+
+ // Remember that this phi makes the value alive in this block.
+ SSAUpdate.AddAvailableValue(ExitBB, PN);
+
+ // LoopSimplify might fail to simplify some loops (e.g. when indirect
+ // branches are involved). In such situations, it might happen that an
+ // exit for Loop L1 is the header of a disjoint Loop L2. Thus, when we
+ // create PHIs in such an exit block, we are also inserting PHIs into L2's
+ // header. This could break LCSSA form for L2 because these inserted PHIs
+ // can also have uses outside of L2. Remember all PHIs in such situation
+ // as to revisit than later on. FIXME: Remove this if indirectbr support
+ // into LoopSimplify gets improved.
+ if (auto *OtherLoop = LI.getLoopFor(ExitBB))
+ if (!L->contains(OtherLoop))
+ PostProcessPHIs.push_back(PN);
+ }
+
+ // Rewrite all uses outside the loop in terms of the new PHIs we just
+ // inserted.
+ for (Use *UseToRewrite : UsesToRewrite) {
+ // If this use is in an exit block, rewrite to use the newly inserted PHI.
+ // This is required for correctness because SSAUpdate doesn't handle uses
+ // in the same block. It assumes the PHI we inserted is at the end of the
+ // block.
+ Instruction *User = cast<Instruction>(UseToRewrite->getUser());
+ BasicBlock *UserBB = User->getParent();
+ if (auto *PN = dyn_cast<PHINode>(User))
+ UserBB = PN->getIncomingBlock(*UseToRewrite);
+
+ if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
+ // Tell the VHs that the uses changed. This updates SCEV's caches.
+ if (UseToRewrite->get()->hasValueHandle())
+ ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front());
+ UseToRewrite->set(&UserBB->front());
+ continue;
+ }
+
+ // Otherwise, do full PHI insertion.
+ SSAUpdate.RewriteUse(*UseToRewrite);
+ }
+
+ // SSAUpdater might have inserted phi-nodes inside other loops. We'll need
+ // to post-process them to keep LCSSA form.
+ for (PHINode *InsertedPN : InsertedPHIs) {
+ if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent()))
+ if (!L->contains(OtherLoop))
+ PostProcessPHIs.push_back(InsertedPN);
+ }
+
+ // Post process PHI instructions that were inserted into another disjoint
+ // loop and update their exits properly.
+ for (auto *PostProcessPN : PostProcessPHIs)
+ if (!PostProcessPN->use_empty())
+ Worklist.push_back(PostProcessPN);
+
+ // Keep track of PHI nodes that we want to remove because they did not have
+ // any uses rewritten.
+ for (PHINode *PN : AddedPHIs)
+ if (PN->use_empty())
+ PHIsToRemove.insert(PN);
+
+ Changed = true;
+ }
+ // Remove PHI nodes that did not have any uses rewritten.
+ for (PHINode *PN : PHIsToRemove) {
+ assert (PN->use_empty() && "Trying to remove a phi with uses.");
+ PN->eraseFromParent();
+ }
+ return Changed;
+}
+
+// Compute the set of BasicBlocks in the loop `L` dominating at least one exit.
+static void computeBlocksDominatingExits(
+ Loop &L, DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks,
+ SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) {
+ SmallVector<BasicBlock *, 8> BBWorklist;
+
+ // We start from the exit blocks, as every block trivially dominates itself
+ // (not strictly).
+ for (BasicBlock *BB : ExitBlocks)
+ BBWorklist.push_back(BB);
+
+ while (!BBWorklist.empty()) {
+ BasicBlock *BB = BBWorklist.pop_back_val();
+
+ // Check if this is a loop header. If this is the case, we're done.
+ if (L.getHeader() == BB)
+ continue;
+
+ // Otherwise, add its immediate predecessor in the dominator tree to the
+ // worklist, unless we visited it already.
+ BasicBlock *IDomBB = DT.getNode(BB)->getIDom()->getBlock();
+
+ // Exit blocks can have an immediate dominator not beloinging to the
+ // loop. For an exit block to be immediately dominated by another block
+ // outside the loop, it implies not all paths from that dominator, to the
+ // exit block, go through the loop.
+ // Example:
+ //
+ // |---- A
+ // | |
+ // | B<--
+ // | | |
+ // |---> C --
+ // |
+ // D
+ //
+ // C is the exit block of the loop and it's immediately dominated by A,
+ // which doesn't belong to the loop.
+ if (!L.contains(IDomBB))
+ continue;
+
+ if (BlocksDominatingExits.insert(IDomBB))
+ BBWorklist.push_back(IDomBB);
+ }
+}
+
+bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
+ ScalarEvolution *SE) {
+ bool Changed = false;
+
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ L.getExitBlocks(ExitBlocks);
+ if (ExitBlocks.empty())
+ return false;
+
+ SmallSetVector<BasicBlock *, 8> BlocksDominatingExits;
+
+ // We want to avoid use-scanning leveraging dominance informations.
+ // If a block doesn't dominate any of the loop exits, the none of the values
+ // defined in the loop can be used outside.
+ // We compute the set of blocks fullfilling the conditions in advance
+ // walking the dominator tree upwards until we hit a loop header.
+ computeBlocksDominatingExits(L, DT, ExitBlocks, BlocksDominatingExits);
+
+ SmallVector<Instruction *, 8> Worklist;
+
+ // Look at all the instructions in the loop, checking to see if they have uses
+ // outside the loop. If so, put them into the worklist to rewrite those uses.
+ for (BasicBlock *BB : BlocksDominatingExits) {
+ for (Instruction &I : *BB) {
+ // Reject two common cases fast: instructions with no uses (like stores)
+ // and instructions with one use that is in the same block as this.
+ if (I.use_empty() ||
+ (I.hasOneUse() && I.user_back()->getParent() == BB &&
+ !isa<PHINode>(I.user_back())))
+ continue;
+
+ // Tokens cannot be used in PHI nodes, so we skip over them.
+ // We can run into tokens which are live out of a loop with catchswitch
+ // instructions in Windows EH if the catchswitch has one catchpad which
+ // is inside the loop and another which is not.
+ if (I.getType()->isTokenTy())
+ continue;
+
+ Worklist.push_back(&I);
+ }
+ }
+ Changed = formLCSSAForInstructions(Worklist, DT, *LI);
+
+ // If we modified the code, remove any caches about the loop from SCEV to
+ // avoid dangling entries.
+ // FIXME: This is a big hammer, can we clear the cache more selectively?
+ if (SE && Changed)
+ SE->forgetLoop(&L);
+
+ assert(L.isLCSSAForm(DT));
+
+ return Changed;
+}
+
+/// Process a loop nest depth first.
+bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
+ ScalarEvolution *SE) {
+ bool Changed = false;
+
+ // Recurse depth-first through inner loops.
+ for (Loop *SubLoop : L.getSubLoops())
+ Changed |= formLCSSARecursively(*SubLoop, DT, LI, SE);
+
+ Changed |= formLCSSA(L, DT, LI, SE);
+ return Changed;
+}
+
+/// Process all loops in the function, inner-most out.
+static bool formLCSSAOnAllLoops(LoopInfo *LI, DominatorTree &DT,
+ ScalarEvolution *SE) {
+ bool Changed = false;
+ for (auto &L : *LI)
+ Changed |= formLCSSARecursively(*L, DT, LI, SE);
+ return Changed;
+}
+
+namespace {
+struct LCSSAWrapperPass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ LCSSAWrapperPass() : FunctionPass(ID) {
+ initializeLCSSAWrapperPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ // Cached analysis information for the current function.
+ DominatorTree *DT;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+
+ bool runOnFunction(Function &F) override;
+ void verifyAnalysis() const override {
+ // This check is very expensive. On the loop intensive compiles it may cause
+ // up to 10x slowdown. Currently it's disabled by default. LPPassManager
+ // always does limited form of the LCSSA verification. Similar reasoning
+ // was used for the LoopInfo verifier.
+ if (VerifyLoopLCSSA) {
+ assert(all_of(*LI,
+ [&](Loop *L) {
+ return L->isRecursivelyLCSSAForm(*DT, *LI);
+ }) &&
+ "LCSSA form is broken!");
+ }
+ };
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG. It maintains both of these,
+ /// as well as the CFG. It also requires dominator information.
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
+
+ // This is needed to perform LCSSA verification inside LPPassManager
+ AU.addRequired<LCSSAVerificationPass>();
+ AU.addPreserved<LCSSAVerificationPass>();
+ }
+};
+}
+
+char LCSSAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LCSSAVerificationPass)
+INITIALIZE_PASS_END(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass",
+ false, false)
+
+Pass *llvm::createLCSSAPass() { return new LCSSAWrapperPass(); }
+char &llvm::LCSSAID = LCSSAWrapperPass::ID;
+
+/// Transform \p F into loop-closed SSA form.
+bool LCSSAWrapperPass::runOnFunction(Function &F) {
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+ SE = SEWP ? &SEWP->getSE() : nullptr;
+
+ return formLCSSAOnAllLoops(LI, *DT, SE);
+}
+
+PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) {
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F);
+ if (!formLCSSAOnAllLoops(&LI, DT, SE))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ PA.preserve<BasicAA>();
+ PA.preserve<GlobalsAA>();
+ PA.preserve<SCEVAA>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+ return PA;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
new file mode 100644
index 000000000000..42aca757c2af
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -0,0 +1,565 @@
+//===-- LibCallsShrinkWrap.cpp ----------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass shrink-wraps a call to function if the result is not used.
+// The call can set errno but is otherwise side effect free. For example:
+// sqrt(val);
+// is transformed to
+// if (val < 0)
+// sqrt(val);
+// Even if the result of library call is not being used, the compiler cannot
+// safely delete the call because the function can set errno on error
+// conditions.
+// Note in many functions, the error condition solely depends on the incoming
+// parameter. In this optimization, we can generate the condition can lead to
+// the errno to shrink-wrap the call. Since the chances of hitting the error
+// condition is low, the runtime call is effectively eliminated.
+//
+// These partially dead calls are usually results of C++ abstraction penalty
+// exposed by inlining.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "libcalls-shrinkwrap"
+
+STATISTIC(NumWrappedOneCond, "Number of One-Condition Wrappers Inserted");
+STATISTIC(NumWrappedTwoCond, "Number of Two-Condition Wrappers Inserted");
+
+namespace {
+class LibCallsShrinkWrapLegacyPass : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit LibCallsShrinkWrapLegacyPass() : FunctionPass(ID) {
+ initializeLibCallsShrinkWrapLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
+};
+}
+
+char LibCallsShrinkWrapLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap",
+ "Conditionally eliminate dead library calls", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap",
+ "Conditionally eliminate dead library calls", false, false)
+
+namespace {
+class LibCallsShrinkWrap : public InstVisitor<LibCallsShrinkWrap> {
+public:
+ LibCallsShrinkWrap(const TargetLibraryInfo &TLI, DominatorTree *DT)
+ : TLI(TLI), DT(DT){};
+ void visitCallInst(CallInst &CI) { checkCandidate(CI); }
+ bool perform() {
+ bool Changed = false;
+ for (auto &CI : WorkList) {
+ DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName()
+ << "\n");
+ if (perform(CI)) {
+ Changed = true;
+ DEBUG(dbgs() << "Transformed\n");
+ }
+ }
+ return Changed;
+ }
+
+private:
+ bool perform(CallInst *CI);
+ void checkCandidate(CallInst &CI);
+ void shrinkWrapCI(CallInst *CI, Value *Cond);
+ bool performCallDomainErrorOnly(CallInst *CI, const LibFunc &Func);
+ bool performCallErrors(CallInst *CI, const LibFunc &Func);
+ bool performCallRangeErrorOnly(CallInst *CI, const LibFunc &Func);
+ Value *generateOneRangeCond(CallInst *CI, const LibFunc &Func);
+ Value *generateTwoRangeCond(CallInst *CI, const LibFunc &Func);
+ Value *generateCondForPow(CallInst *CI, const LibFunc &Func);
+
+ // Create an OR of two conditions.
+ Value *createOrCond(CallInst *CI, CmpInst::Predicate Cmp, float Val,
+ CmpInst::Predicate Cmp2, float Val2) {
+ IRBuilder<> BBBuilder(CI);
+ Value *Arg = CI->getArgOperand(0);
+ auto Cond2 = createCond(BBBuilder, Arg, Cmp2, Val2);
+ auto Cond1 = createCond(BBBuilder, Arg, Cmp, Val);
+ return BBBuilder.CreateOr(Cond1, Cond2);
+ }
+
+ // Create a single condition using IRBuilder.
+ Value *createCond(IRBuilder<> &BBBuilder, Value *Arg, CmpInst::Predicate Cmp,
+ float Val) {
+ Constant *V = ConstantFP::get(BBBuilder.getContext(), APFloat(Val));
+ if (!Arg->getType()->isFloatTy())
+ V = ConstantExpr::getFPExtend(V, Arg->getType());
+ return BBBuilder.CreateFCmp(Cmp, Arg, V);
+ }
+
+ // Create a single condition.
+ Value *createCond(CallInst *CI, CmpInst::Predicate Cmp, float Val) {
+ IRBuilder<> BBBuilder(CI);
+ Value *Arg = CI->getArgOperand(0);
+ return createCond(BBBuilder, Arg, Cmp, Val);
+ }
+
+ const TargetLibraryInfo &TLI;
+ DominatorTree *DT;
+ SmallVector<CallInst *, 16> WorkList;
+};
+} // end anonymous namespace
+
+// Perform the transformation to calls with errno set by domain error.
+bool LibCallsShrinkWrap::performCallDomainErrorOnly(CallInst *CI,
+ const LibFunc &Func) {
+ Value *Cond = nullptr;
+
+ switch (Func) {
+ case LibFunc_acos: // DomainError: (x < -1 || x > 1)
+ case LibFunc_acosf: // Same as acos
+ case LibFunc_acosl: // Same as acos
+ case LibFunc_asin: // DomainError: (x < -1 || x > 1)
+ case LibFunc_asinf: // Same as asin
+ case LibFunc_asinl: // Same as asin
+ {
+ ++NumWrappedTwoCond;
+ Cond = createOrCond(CI, CmpInst::FCMP_OLT, -1.0f, CmpInst::FCMP_OGT, 1.0f);
+ break;
+ }
+ case LibFunc_cos: // DomainError: (x == +inf || x == -inf)
+ case LibFunc_cosf: // Same as cos
+ case LibFunc_cosl: // Same as cos
+ case LibFunc_sin: // DomainError: (x == +inf || x == -inf)
+ case LibFunc_sinf: // Same as sin
+ case LibFunc_sinl: // Same as sin
+ {
+ ++NumWrappedTwoCond;
+ Cond = createOrCond(CI, CmpInst::FCMP_OEQ, INFINITY, CmpInst::FCMP_OEQ,
+ -INFINITY);
+ break;
+ }
+ case LibFunc_acosh: // DomainError: (x < 1)
+ case LibFunc_acoshf: // Same as acosh
+ case LibFunc_acoshl: // Same as acosh
+ {
+ ++NumWrappedOneCond;
+ Cond = createCond(CI, CmpInst::FCMP_OLT, 1.0f);
+ break;
+ }
+ case LibFunc_sqrt: // DomainError: (x < 0)
+ case LibFunc_sqrtf: // Same as sqrt
+ case LibFunc_sqrtl: // Same as sqrt
+ {
+ ++NumWrappedOneCond;
+ Cond = createCond(CI, CmpInst::FCMP_OLT, 0.0f);
+ break;
+ }
+ default:
+ return false;
+ }
+ shrinkWrapCI(CI, Cond);
+ return true;
+}
+
+// Perform the transformation to calls with errno set by range error.
+bool LibCallsShrinkWrap::performCallRangeErrorOnly(CallInst *CI,
+ const LibFunc &Func) {
+ Value *Cond = nullptr;
+
+ switch (Func) {
+ case LibFunc_cosh:
+ case LibFunc_coshf:
+ case LibFunc_coshl:
+ case LibFunc_exp:
+ case LibFunc_expf:
+ case LibFunc_expl:
+ case LibFunc_exp10:
+ case LibFunc_exp10f:
+ case LibFunc_exp10l:
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
+ case LibFunc_exp2l:
+ case LibFunc_sinh:
+ case LibFunc_sinhf:
+ case LibFunc_sinhl: {
+ Cond = generateTwoRangeCond(CI, Func);
+ break;
+ }
+ case LibFunc_expm1: // RangeError: (709, inf)
+ case LibFunc_expm1f: // RangeError: (88, inf)
+ case LibFunc_expm1l: // RangeError: (11356, inf)
+ {
+ Cond = generateOneRangeCond(CI, Func);
+ break;
+ }
+ default:
+ return false;
+ }
+ shrinkWrapCI(CI, Cond);
+ return true;
+}
+
+// Perform the transformation to calls with errno set by combination of errors.
+bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
+ const LibFunc &Func) {
+ Value *Cond = nullptr;
+
+ switch (Func) {
+ case LibFunc_atanh: // DomainError: (x < -1 || x > 1)
+ // PoleError: (x == -1 || x == 1)
+ // Overall Cond: (x <= -1 || x >= 1)
+ case LibFunc_atanhf: // Same as atanh
+ case LibFunc_atanhl: // Same as atanh
+ {
+ ++NumWrappedTwoCond;
+ Cond = createOrCond(CI, CmpInst::FCMP_OLE, -1.0f, CmpInst::FCMP_OGE, 1.0f);
+ break;
+ }
+ case LibFunc_log: // DomainError: (x < 0)
+ // PoleError: (x == 0)
+ // Overall Cond: (x <= 0)
+ case LibFunc_logf: // Same as log
+ case LibFunc_logl: // Same as log
+ case LibFunc_log10: // Same as log
+ case LibFunc_log10f: // Same as log
+ case LibFunc_log10l: // Same as log
+ case LibFunc_log2: // Same as log
+ case LibFunc_log2f: // Same as log
+ case LibFunc_log2l: // Same as log
+ case LibFunc_logb: // Same as log
+ case LibFunc_logbf: // Same as log
+ case LibFunc_logbl: // Same as log
+ {
+ ++NumWrappedOneCond;
+ Cond = createCond(CI, CmpInst::FCMP_OLE, 0.0f);
+ break;
+ }
+ case LibFunc_log1p: // DomainError: (x < -1)
+ // PoleError: (x == -1)
+ // Overall Cond: (x <= -1)
+ case LibFunc_log1pf: // Same as log1p
+ case LibFunc_log1pl: // Same as log1p
+ {
+ ++NumWrappedOneCond;
+ Cond = createCond(CI, CmpInst::FCMP_OLE, -1.0f);
+ break;
+ }
+ case LibFunc_pow: // DomainError: x < 0 and y is noninteger
+ // PoleError: x == 0 and y < 0
+ // RangeError: overflow or underflow
+ case LibFunc_powf:
+ case LibFunc_powl: {
+ Cond = generateCondForPow(CI, Func);
+ if (Cond == nullptr)
+ return false;
+ break;
+ }
+ default:
+ return false;
+ }
+ assert(Cond && "performCallErrors should not see an empty condition");
+ shrinkWrapCI(CI, Cond);
+ return true;
+}
+
+// Checks if CI is a candidate for shrinkwrapping and put it into work list if
+// true.
+void LibCallsShrinkWrap::checkCandidate(CallInst &CI) {
+ if (CI.isNoBuiltin())
+ return;
+ // A possible improvement is to handle the calls with the return value being
+ // used. If there is API for fast libcall implementation without setting
+ // errno, we can use the same framework to direct/wrap the call to the fast
+ // API in the error free path, and leave the original call in the slow path.
+ if (!CI.use_empty())
+ return;
+
+ LibFunc Func;
+ Function *Callee = CI.getCalledFunction();
+ if (!Callee)
+ return;
+ if (!TLI.getLibFunc(*Callee, Func) || !TLI.has(Func))
+ return;
+
+ if (CI.getNumArgOperands() == 0)
+ return;
+ // TODO: Handle long double in other formats.
+ Type *ArgType = CI.getArgOperand(0)->getType();
+ if (!(ArgType->isFloatTy() || ArgType->isDoubleTy() ||
+ ArgType->isX86_FP80Ty()))
+ return;
+
+ WorkList.push_back(&CI);
+}
+
+// Generate the upper bound condition for RangeError.
+Value *LibCallsShrinkWrap::generateOneRangeCond(CallInst *CI,
+ const LibFunc &Func) {
+ float UpperBound;
+ switch (Func) {
+ case LibFunc_expm1: // RangeError: (709, inf)
+ UpperBound = 709.0f;
+ break;
+ case LibFunc_expm1f: // RangeError: (88, inf)
+ UpperBound = 88.0f;
+ break;
+ case LibFunc_expm1l: // RangeError: (11356, inf)
+ UpperBound = 11356.0f;
+ break;
+ default:
+ llvm_unreachable("Unhandled library call!");
+ }
+
+ ++NumWrappedOneCond;
+ return createCond(CI, CmpInst::FCMP_OGT, UpperBound);
+}
+
+// Generate the lower and upper bound condition for RangeError.
+Value *LibCallsShrinkWrap::generateTwoRangeCond(CallInst *CI,
+ const LibFunc &Func) {
+ float UpperBound, LowerBound;
+ switch (Func) {
+ case LibFunc_cosh: // RangeError: (x < -710 || x > 710)
+ case LibFunc_sinh: // Same as cosh
+ LowerBound = -710.0f;
+ UpperBound = 710.0f;
+ break;
+ case LibFunc_coshf: // RangeError: (x < -89 || x > 89)
+ case LibFunc_sinhf: // Same as coshf
+ LowerBound = -89.0f;
+ UpperBound = 89.0f;
+ break;
+ case LibFunc_coshl: // RangeError: (x < -11357 || x > 11357)
+ case LibFunc_sinhl: // Same as coshl
+ LowerBound = -11357.0f;
+ UpperBound = 11357.0f;
+ break;
+ case LibFunc_exp: // RangeError: (x < -745 || x > 709)
+ LowerBound = -745.0f;
+ UpperBound = 709.0f;
+ break;
+ case LibFunc_expf: // RangeError: (x < -103 || x > 88)
+ LowerBound = -103.0f;
+ UpperBound = 88.0f;
+ break;
+ case LibFunc_expl: // RangeError: (x < -11399 || x > 11356)
+ LowerBound = -11399.0f;
+ UpperBound = 11356.0f;
+ break;
+ case LibFunc_exp10: // RangeError: (x < -323 || x > 308)
+ LowerBound = -323.0f;
+ UpperBound = 308.0f;
+ break;
+ case LibFunc_exp10f: // RangeError: (x < -45 || x > 38)
+ LowerBound = -45.0f;
+ UpperBound = 38.0f;
+ break;
+ case LibFunc_exp10l: // RangeError: (x < -4950 || x > 4932)
+ LowerBound = -4950.0f;
+ UpperBound = 4932.0f;
+ break;
+ case LibFunc_exp2: // RangeError: (x < -1074 || x > 1023)
+ LowerBound = -1074.0f;
+ UpperBound = 1023.0f;
+ break;
+ case LibFunc_exp2f: // RangeError: (x < -149 || x > 127)
+ LowerBound = -149.0f;
+ UpperBound = 127.0f;
+ break;
+ case LibFunc_exp2l: // RangeError: (x < -16445 || x > 11383)
+ LowerBound = -16445.0f;
+ UpperBound = 11383.0f;
+ break;
+ default:
+ llvm_unreachable("Unhandled library call!");
+ }
+
+ ++NumWrappedTwoCond;
+ return createOrCond(CI, CmpInst::FCMP_OGT, UpperBound, CmpInst::FCMP_OLT,
+ LowerBound);
+}
+
+// For pow(x,y), We only handle the following cases:
+// (1) x is a constant && (x >= 1) && (x < MaxUInt8)
+// Cond is: (y > 127)
+// (2) x is a value coming from an integer type.
+// (2.1) if x's bit_size == 8
+// Cond: (x <= 0 || y > 128)
+// (2.2) if x's bit_size is 16
+// Cond: (x <= 0 || y > 64)
+// (2.3) if x's bit_size is 32
+// Cond: (x <= 0 || y > 32)
+// Support for powl(x,y) and powf(x,y) are TBD.
+//
+// Note that condition can be more conservative than the actual condition
+// (i.e. we might invoke the calls that will not set the errno.).
+//
+Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,
+ const LibFunc &Func) {
+ // FIXME: LibFunc_powf and powl TBD.
+ if (Func != LibFunc_pow) {
+ DEBUG(dbgs() << "Not handled powf() and powl()\n");
+ return nullptr;
+ }
+
+ Value *Base = CI->getArgOperand(0);
+ Value *Exp = CI->getArgOperand(1);
+ IRBuilder<> BBBuilder(CI);
+
+ // Constant Base case.
+ if (ConstantFP *CF = dyn_cast<ConstantFP>(Base)) {
+ double D = CF->getValueAPF().convertToDouble();
+ if (D < 1.0f || D > APInt::getMaxValue(8).getZExtValue()) {
+ DEBUG(dbgs() << "Not handled pow(): constant base out of range\n");
+ return nullptr;
+ }
+
+ ++NumWrappedOneCond;
+ Constant *V = ConstantFP::get(CI->getContext(), APFloat(127.0f));
+ if (!Exp->getType()->isFloatTy())
+ V = ConstantExpr::getFPExtend(V, Exp->getType());
+ return BBBuilder.CreateFCmp(CmpInst::FCMP_OGT, Exp, V);
+ }
+
+ // If the Base value coming from an integer type.
+ Instruction *I = dyn_cast<Instruction>(Base);
+ if (!I) {
+ DEBUG(dbgs() << "Not handled pow(): FP type base\n");
+ return nullptr;
+ }
+ unsigned Opcode = I->getOpcode();
+ if (Opcode == Instruction::UIToFP || Opcode == Instruction::SIToFP) {
+ unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ float UpperV = 0.0f;
+ if (BW == 8)
+ UpperV = 128.0f;
+ else if (BW == 16)
+ UpperV = 64.0f;
+ else if (BW == 32)
+ UpperV = 32.0f;
+ else {
+ DEBUG(dbgs() << "Not handled pow(): type too wide\n");
+ return nullptr;
+ }
+
+ ++NumWrappedTwoCond;
+ Constant *V = ConstantFP::get(CI->getContext(), APFloat(UpperV));
+ Constant *V0 = ConstantFP::get(CI->getContext(), APFloat(0.0f));
+ if (!Exp->getType()->isFloatTy())
+ V = ConstantExpr::getFPExtend(V, Exp->getType());
+ if (!Base->getType()->isFloatTy())
+ V0 = ConstantExpr::getFPExtend(V0, Exp->getType());
+
+ Value *Cond = BBBuilder.CreateFCmp(CmpInst::FCMP_OGT, Exp, V);
+ Value *Cond0 = BBBuilder.CreateFCmp(CmpInst::FCMP_OLE, Base, V0);
+ return BBBuilder.CreateOr(Cond0, Cond);
+ }
+ DEBUG(dbgs() << "Not handled pow(): base not from integer convert\n");
+ return nullptr;
+}
+
+// Wrap conditions that can potentially generate errno to the library call.
+void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) {
+ assert(Cond != nullptr && "ShrinkWrapCI is not expecting an empty call inst");
+ MDNode *BranchWeights =
+ MDBuilder(CI->getContext()).createBranchWeights(1, 2000);
+
+ TerminatorInst *NewInst =
+ SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights, DT);
+ BasicBlock *CallBB = NewInst->getParent();
+ CallBB->setName("cdce.call");
+ BasicBlock *SuccBB = CallBB->getSingleSuccessor();
+ assert(SuccBB && "The split block should have a single successor");
+ SuccBB->setName("cdce.end");
+ CI->removeFromParent();
+ CallBB->getInstList().insert(CallBB->getFirstInsertionPt(), CI);
+ DEBUG(dbgs() << "== Basic Block After ==");
+ DEBUG(dbgs() << *CallBB->getSinglePredecessor() << *CallBB
+ << *CallBB->getSingleSuccessor() << "\n");
+}
+
+// Perform the transformation to a single candidate.
+bool LibCallsShrinkWrap::perform(CallInst *CI) {
+ LibFunc Func;
+ Function *Callee = CI->getCalledFunction();
+ assert(Callee && "perform() should apply to a non-empty callee");
+ TLI.getLibFunc(*Callee, Func);
+ assert(Func && "perform() is not expecting an empty function");
+
+ if (performCallDomainErrorOnly(CI, Func) || performCallRangeErrorOnly(CI, Func))
+ return true;
+ return performCallErrors(CI, Func);
+}
+
+void LibCallsShrinkWrapLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
+
+static bool runImpl(Function &F, const TargetLibraryInfo &TLI,
+ DominatorTree *DT) {
+ if (F.hasFnAttribute(Attribute::OptimizeForSize))
+ return false;
+ LibCallsShrinkWrap CCDCE(TLI, DT);
+ CCDCE.visit(F);
+ bool Changed = CCDCE.perform();
+
+// Verify the dominator after we've updated it locally.
+#ifndef NDEBUG
+ if (DT)
+ DT->verifyDomTree();
+#endif
+ return Changed;
+}
+
+bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) {
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ return runImpl(F, TLI, DT);
+}
+
+namespace llvm {
+char &LibCallsShrinkWrapPassID = LibCallsShrinkWrapLegacyPass::ID;
+
+// Public interface to LibCallsShrinkWrap pass.
+FunctionPass *createLibCallsShrinkWrapPass() {
+ return new LibCallsShrinkWrapLegacyPass();
+}
+
+PreservedAnalyses LibCallsShrinkWrapPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
+ auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
+ if (!runImpl(F, TLI, DT))
+ return PreservedAnalyses::all();
+ auto PA = PreservedAnalyses();
+ PA.preserve<GlobalsAA>();
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
new file mode 100644
index 000000000000..74610613001c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -0,0 +1,2210 @@
+//===-- Local.cpp - Functions to perform local transformations ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform various local transformations to the
+// program.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "local"
+
+STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
+
+//===----------------------------------------------------------------------===//
+// Local constant propagation.
+//
+
+/// ConstantFoldTerminator - If a terminator instruction is predicated on a
+/// constant value, convert it into an unconditional branch to the constant
+/// destination. This is a nontrivial operation because the successors of this
+/// basic block must have their PHI nodes updated.
+/// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch
+/// conditions and indirectbr addresses this might make dead if
+/// DeleteDeadConditions is true.
+bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
+ const TargetLibraryInfo *TLI) {
+ TerminatorInst *T = BB->getTerminator();
+ IRBuilder<> Builder(T);
+
+ // Branch - See if we are conditional jumping on constant
+ if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
+ if (BI->isUnconditional()) return false; // Can't optimize uncond branch
+ BasicBlock *Dest1 = BI->getSuccessor(0);
+ BasicBlock *Dest2 = BI->getSuccessor(1);
+
+ if (ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
+ // Are we branching on constant?
+ // YES. Change to unconditional branch...
+ BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2;
+ BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1;
+
+ //cerr << "Function: " << T->getParent()->getParent()
+ // << "\nRemoving branch from " << T->getParent()
+ // << "\n\nTo: " << OldDest << endl;
+
+ // Let the basic block know that we are letting go of it. Based on this,
+ // it will adjust it's PHI nodes.
+ OldDest->removePredecessor(BB);
+
+ // Replace the conditional branch with an unconditional one.
+ Builder.CreateBr(Destination);
+ BI->eraseFromParent();
+ return true;
+ }
+
+ if (Dest2 == Dest1) { // Conditional branch to same location?
+ // This branch matches something like this:
+ // br bool %cond, label %Dest, label %Dest
+ // and changes it into: br label %Dest
+
+ // Let the basic block know that we are letting go of one copy of it.
+ assert(BI->getParent() && "Terminator not inserted in block!");
+ Dest1->removePredecessor(BI->getParent());
+
+ // Replace the conditional branch with an unconditional one.
+ Builder.CreateBr(Dest1);
+ Value *Cond = BI->getCondition();
+ BI->eraseFromParent();
+ if (DeleteDeadConditions)
+ RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
+ return true;
+ }
+ return false;
+ }
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
+ // If we are switching on a constant, we can convert the switch to an
+ // unconditional branch.
+ ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition());
+ BasicBlock *DefaultDest = SI->getDefaultDest();
+ BasicBlock *TheOnlyDest = DefaultDest;
+
+ // If the default is unreachable, ignore it when searching for TheOnlyDest.
+ if (isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg()) &&
+ SI->getNumCases() > 0) {
+ TheOnlyDest = SI->case_begin()->getCaseSuccessor();
+ }
+
+ // Figure out which case it goes to.
+ for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) {
+ // Found case matching a constant operand?
+ if (i->getCaseValue() == CI) {
+ TheOnlyDest = i->getCaseSuccessor();
+ break;
+ }
+
+ // Check to see if this branch is going to the same place as the default
+ // dest. If so, eliminate it as an explicit compare.
+ if (i->getCaseSuccessor() == DefaultDest) {
+ MDNode *MD = SI->getMetadata(LLVMContext::MD_prof);
+ unsigned NCases = SI->getNumCases();
+ // Fold the case metadata into the default if there will be any branches
+ // left, unless the metadata doesn't match the switch.
+ if (NCases > 1 && MD && MD->getNumOperands() == 2 + NCases) {
+ // Collect branch weights into a vector.
+ SmallVector<uint32_t, 8> Weights;
+ for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
+ ++MD_i) {
+ auto *CI = mdconst::extract<ConstantInt>(MD->getOperand(MD_i));
+ Weights.push_back(CI->getValue().getZExtValue());
+ }
+ // Merge weight of this case to the default weight.
+ unsigned idx = i->getCaseIndex();
+ Weights[0] += Weights[idx+1];
+ // Remove weight for this case.
+ std::swap(Weights[idx+1], Weights.back());
+ Weights.pop_back();
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).
+ createBranchWeights(Weights));
+ }
+ // Remove this entry.
+ DefaultDest->removePredecessor(SI->getParent());
+ i = SI->removeCase(i);
+ e = SI->case_end();
+ continue;
+ }
+
+ // Otherwise, check to see if the switch only branches to one destination.
+ // We do this by reseting "TheOnlyDest" to null when we find two non-equal
+ // destinations.
+ if (i->getCaseSuccessor() != TheOnlyDest)
+ TheOnlyDest = nullptr;
+
+ // Increment this iterator as we haven't removed the case.
+ ++i;
+ }
+
+ if (CI && !TheOnlyDest) {
+ // Branching on a constant, but not any of the cases, go to the default
+ // successor.
+ TheOnlyDest = SI->getDefaultDest();
+ }
+
+ // If we found a single destination that we can fold the switch into, do so
+ // now.
+ if (TheOnlyDest) {
+ // Insert the new branch.
+ Builder.CreateBr(TheOnlyDest);
+ BasicBlock *BB = SI->getParent();
+
+ // Remove entries from PHI nodes which we no longer branch to...
+ for (BasicBlock *Succ : SI->successors()) {
+ // Found case matching a constant operand?
+ if (Succ == TheOnlyDest)
+ TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest
+ else
+ Succ->removePredecessor(BB);
+ }
+
+ // Delete the old switch.
+ Value *Cond = SI->getCondition();
+ SI->eraseFromParent();
+ if (DeleteDeadConditions)
+ RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
+ return true;
+ }
+
+ if (SI->getNumCases() == 1) {
+ // Otherwise, we can fold this switch into a conditional branch
+ // instruction if it has only one non-default destination.
+ auto FirstCase = *SI->case_begin();
+ Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
+ FirstCase.getCaseValue(), "cond");
+
+ // Insert the new branch.
+ BranchInst *NewBr = Builder.CreateCondBr(Cond,
+ FirstCase.getCaseSuccessor(),
+ SI->getDefaultDest());
+ MDNode *MD = SI->getMetadata(LLVMContext::MD_prof);
+ if (MD && MD->getNumOperands() == 3) {
+ ConstantInt *SICase =
+ mdconst::dyn_extract<ConstantInt>(MD->getOperand(2));
+ ConstantInt *SIDef =
+ mdconst::dyn_extract<ConstantInt>(MD->getOperand(1));
+ assert(SICase && SIDef);
+ // The TrueWeight should be the weight for the single case of SI.
+ NewBr->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).
+ createBranchWeights(SICase->getValue().getZExtValue(),
+ SIDef->getValue().getZExtValue()));
+ }
+
+ // Update make.implicit metadata to the newly-created conditional branch.
+ MDNode *MakeImplicitMD = SI->getMetadata(LLVMContext::MD_make_implicit);
+ if (MakeImplicitMD)
+ NewBr->setMetadata(LLVMContext::MD_make_implicit, MakeImplicitMD);
+
+ // Delete the old switch.
+ SI->eraseFromParent();
+ return true;
+ }
+ return false;
+ }
+
+ if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(T)) {
+ // indirectbr blockaddress(@F, @BB) -> br label @BB
+ if (BlockAddress *BA =
+ dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) {
+ BasicBlock *TheOnlyDest = BA->getBasicBlock();
+ // Insert the new branch.
+ Builder.CreateBr(TheOnlyDest);
+
+ for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+ if (IBI->getDestination(i) == TheOnlyDest)
+ TheOnlyDest = nullptr;
+ else
+ IBI->getDestination(i)->removePredecessor(IBI->getParent());
+ }
+ Value *Address = IBI->getAddress();
+ IBI->eraseFromParent();
+ if (DeleteDeadConditions)
+ RecursivelyDeleteTriviallyDeadInstructions(Address, TLI);
+
+ // If we didn't find our destination in the IBI successor list, then we
+ // have undefined behavior. Replace the unconditional branch with an
+ // 'unreachable' instruction.
+ if (TheOnlyDest) {
+ BB->getTerminator()->eraseFromParent();
+ new UnreachableInst(BB->getContext(), BB);
+ }
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Local dead code elimination.
+//
+
+/// isInstructionTriviallyDead - Return true if the result produced by the
+/// instruction is not used, and the instruction has no side effects.
+///
+bool llvm::isInstructionTriviallyDead(Instruction *I,
+ const TargetLibraryInfo *TLI) {
+ if (!I->use_empty())
+ return false;
+ return wouldInstructionBeTriviallyDead(I, TLI);
+}
+
+bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
+ const TargetLibraryInfo *TLI) {
+ if (isa<TerminatorInst>(I))
+ return false;
+
+ // We don't want the landingpad-like instructions removed by anything this
+ // general.
+ if (I->isEHPad())
+ return false;
+
+ // We don't want debug info removed by anything this general, unless
+ // debug info is empty.
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) {
+ if (DDI->getAddress())
+ return false;
+ return true;
+ }
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) {
+ if (DVI->getValue())
+ return false;
+ return true;
+ }
+
+ if (!I->mayHaveSideEffects())
+ return true;
+
+ // Special case intrinsics that "may have side effects" but can be deleted
+ // when dead.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ // Safe to delete llvm.stacksave if dead.
+ if (II->getIntrinsicID() == Intrinsic::stacksave)
+ return true;
+
+ // Lifetime intrinsics are dead when their right-hand is undef.
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end)
+ return isa<UndefValue>(II->getArgOperand(1));
+
+ // Assumptions are dead if their condition is trivially true. Guards on
+ // true are operationally no-ops. In the future we can consider more
+ // sophisticated tradeoffs for guards considering potential for check
+ // widening, but for now we keep things simple.
+ if (II->getIntrinsicID() == Intrinsic::assume ||
+ II->getIntrinsicID() == Intrinsic::experimental_guard) {
+ if (ConstantInt *Cond = dyn_cast<ConstantInt>(II->getArgOperand(0)))
+ return !Cond->isZero();
+
+ return false;
+ }
+ }
+
+ if (isAllocLikeFn(I, TLI))
+ return true;
+
+ if (CallInst *CI = isFreeCall(I, TLI))
+ if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
+ return C->isNullValue() || isa<UndefValue>(C);
+
+ if (CallSite CS = CallSite(I))
+ if (isMathLibCallNoop(CS, TLI))
+ return true;
+
+ return false;
+}
+
+/// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a
+/// trivially dead instruction, delete it. If that makes any of its operands
+/// trivially dead, delete them too, recursively. Return true if any
+/// instructions were deleted.
+bool
+llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
+ const TargetLibraryInfo *TLI) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI))
+ return false;
+
+ SmallVector<Instruction*, 16> DeadInsts;
+ DeadInsts.push_back(I);
+
+ do {
+ I = DeadInsts.pop_back_val();
+
+ // Null out all of the instruction's operands to see if any operand becomes
+ // dead as we go.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *OpV = I->getOperand(i);
+ I->setOperand(i, nullptr);
+
+ if (!OpV->use_empty()) continue;
+
+ // If the operand is an instruction that became dead as we nulled out the
+ // operand, and if it is 'trivially' dead, delete it in a future loop
+ // iteration.
+ if (Instruction *OpI = dyn_cast<Instruction>(OpV))
+ if (isInstructionTriviallyDead(OpI, TLI))
+ DeadInsts.push_back(OpI);
+ }
+
+ I->eraseFromParent();
+ } while (!DeadInsts.empty());
+
+ return true;
+}
+
+/// areAllUsesEqual - Check whether the uses of a value are all the same.
+/// This is similar to Instruction::hasOneUse() except this will also return
+/// true when there are no uses or multiple uses that all refer to the same
+/// value.
+static bool areAllUsesEqual(Instruction *I) {
+ Value::user_iterator UI = I->user_begin();
+ Value::user_iterator UE = I->user_end();
+ if (UI == UE)
+ return true;
+
+ User *TheUse = *UI;
+ for (++UI; UI != UE; ++UI) {
+ if (*UI != TheUse)
+ return false;
+ }
+ return true;
+}
+
+/// RecursivelyDeleteDeadPHINode - If the specified value is an effectively
+/// dead PHI node, due to being a def-use chain of single-use nodes that
+/// either forms a cycle or is terminated by a trivially dead instruction,
+/// delete it. If that makes any of its operands trivially dead, delete them
+/// too, recursively. Return true if a change was made.
+bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
+ const TargetLibraryInfo *TLI) {
+ SmallPtrSet<Instruction*, 4> Visited;
+ for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects();
+ I = cast<Instruction>(*I->user_begin())) {
+ if (I->use_empty())
+ return RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+
+ // If we find an instruction more than once, we're on a cycle that
+ // won't prove fruitful.
+ if (!Visited.insert(I).second) {
+ // Break the cycle and delete the instruction and its operands.
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool
+simplifyAndDCEInstruction(Instruction *I,
+ SmallSetVector<Instruction *, 16> &WorkList,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ if (isInstructionTriviallyDead(I, TLI)) {
+ // Null out all of the instruction's operands to see if any operand becomes
+ // dead as we go.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *OpV = I->getOperand(i);
+ I->setOperand(i, nullptr);
+
+ if (!OpV->use_empty() || I == OpV)
+ continue;
+
+ // If the operand is an instruction that became dead as we nulled out the
+ // operand, and if it is 'trivially' dead, delete it in a future loop
+ // iteration.
+ if (Instruction *OpI = dyn_cast<Instruction>(OpV))
+ if (isInstructionTriviallyDead(OpI, TLI))
+ WorkList.insert(OpI);
+ }
+
+ I->eraseFromParent();
+
+ return true;
+ }
+
+ if (Value *SimpleV = SimplifyInstruction(I, DL)) {
+ // Add the users to the worklist. CAREFUL: an instruction can use itself,
+ // in the case of a phi node.
+ for (User *U : I->users()) {
+ if (U != I) {
+ WorkList.insert(cast<Instruction>(U));
+ }
+ }
+
+ // Replace the instruction with its simplified value.
+ bool Changed = false;
+ if (!I->use_empty()) {
+ I->replaceAllUsesWith(SimpleV);
+ Changed = true;
+ }
+ if (isInstructionTriviallyDead(I, TLI)) {
+ I->eraseFromParent();
+ Changed = true;
+ }
+ return Changed;
+ }
+ return false;
+}
+
+/// SimplifyInstructionsInBlock - Scan the specified basic block and try to
+/// simplify any instructions in it and recursively delete dead instructions.
+///
+/// This returns true if it changed the code, note that it can delete
+/// instructions in other blocks as well in this block.
+bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,
+ const TargetLibraryInfo *TLI) {
+ bool MadeChange = false;
+ const DataLayout &DL = BB->getModule()->getDataLayout();
+
+#ifndef NDEBUG
+ // In debug builds, ensure that the terminator of the block is never replaced
+ // or deleted by these simplifications. The idea of simplification is that it
+ // cannot introduce new instructions, and there is no way to replace the
+ // terminator of a block without introducing a new instruction.
+ AssertingVH<Instruction> TerminatorVH(&BB->back());
+#endif
+
+ SmallSetVector<Instruction *, 16> WorkList;
+ // Iterate over the original function, only adding insts to the worklist
+ // if they actually need to be revisited. This avoids having to pre-init
+ // the worklist with the entire function's worth of instructions.
+ for (BasicBlock::iterator BI = BB->begin(), E = std::prev(BB->end());
+ BI != E;) {
+ assert(!BI->isTerminator());
+ Instruction *I = &*BI;
+ ++BI;
+
+ // We're visiting this instruction now, so make sure it's not in the
+ // worklist from an earlier visit.
+ if (!WorkList.count(I))
+ MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI);
+ }
+
+ while (!WorkList.empty()) {
+ Instruction *I = WorkList.pop_back_val();
+ MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI);
+ }
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Graph Restructuring.
+//
+
+
+/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this
+/// method is called when we're about to delete Pred as a predecessor of BB. If
+/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred.
+///
+/// Unlike the removePredecessor method, this attempts to simplify uses of PHI
+/// nodes that collapse into identity values. For example, if we have:
+/// x = phi(1, 0, 0, 0)
+/// y = and x, z
+///
+/// .. and delete the predecessor corresponding to the '1', this will attempt to
+/// recursively fold the and to 0.
+void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) {
+ // This only adjusts blocks with PHI nodes.
+ if (!isa<PHINode>(BB->begin()))
+ return;
+
+ // Remove the entries for Pred from the PHI nodes in BB, but do not simplify
+ // them down. This will leave us with single entry phi nodes and other phis
+ // that can be removed.
+ BB->removePredecessor(Pred, true);
+
+ WeakTrackingVH PhiIt = &BB->front();
+ while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
+ PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
+ Value *OldPhiIt = PhiIt;
+
+ if (!recursivelySimplifyInstruction(PN))
+ continue;
+
+ // If recursive simplification ended up deleting the next PHI node we would
+ // iterate to, then our iterator is invalid, restart scanning from the top
+ // of the block.
+ if (PhiIt != OldPhiIt) PhiIt = &BB->front();
+ }
+}
+
+
+/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
+/// predecessor is known to have one successor (DestBB!). Eliminate the edge
+/// between them, moving the instructions in the predecessor into DestBB and
+/// deleting the predecessor block.
+///
+void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) {
+ // If BB has single-entry PHI nodes, fold them.
+ while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+ Value *NewVal = PN->getIncomingValue(0);
+ // Replace self referencing PHI with undef, it must be dead.
+ if (NewVal == PN) NewVal = UndefValue::get(PN->getType());
+ PN->replaceAllUsesWith(NewVal);
+ PN->eraseFromParent();
+ }
+
+ BasicBlock *PredBB = DestBB->getSinglePredecessor();
+ assert(PredBB && "Block doesn't have a single predecessor!");
+
+ // Zap anything that took the address of DestBB. Not doing this will give the
+ // address an invalid value.
+ if (DestBB->hasAddressTaken()) {
+ BlockAddress *BA = BlockAddress::get(DestBB);
+ Constant *Replacement =
+ ConstantInt::get(llvm::Type::getInt32Ty(BA->getContext()), 1);
+ BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
+ BA->getType()));
+ BA->destroyConstant();
+ }
+
+ // Anything that branched to PredBB now branches to DestBB.
+ PredBB->replaceAllUsesWith(DestBB);
+
+ // Splice all the instructions from PredBB to DestBB.
+ PredBB->getTerminator()->eraseFromParent();
+ DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
+
+ // If the PredBB is the entry block of the function, move DestBB up to
+ // become the entry block after we erase PredBB.
+ if (PredBB == &DestBB->getParent()->getEntryBlock())
+ DestBB->moveAfter(PredBB);
+
+ if (DT) {
+ BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock();
+ DT->changeImmediateDominator(DestBB, PredBBIDom);
+ DT->eraseNode(PredBB);
+ }
+ // Nuke BB.
+ PredBB->eraseFromParent();
+}
+
+/// CanMergeValues - Return true if we can choose one of these values to use
+/// in place of the other. Note that we will always choose the non-undef
+/// value to keep.
+static bool CanMergeValues(Value *First, Value *Second) {
+ return First == Second || isa<UndefValue>(First) || isa<UndefValue>(Second);
+}
+
+/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
+/// almost-empty BB ending in an unconditional branch to Succ, into Succ.
+///
+/// Assumption: Succ is the single successor for BB.
+///
+static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
+ assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
+
+ DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into "
+ << Succ->getName() << "\n");
+ // Shortcut, if there is only a single predecessor it must be BB and merging
+ // is always safe
+ if (Succ->getSinglePredecessor()) return true;
+
+ // Make a list of the predecessors of BB
+ SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
+
+ // Look at all the phi nodes in Succ, to see if they present a conflict when
+ // merging these blocks
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+
+ // If the incoming value from BB is again a PHINode in
+ // BB which has the same incoming value for *PI as PN does, we can
+ // merge the phi nodes and then the blocks can still be merged
+ PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
+ if (BBPN && BBPN->getParent() == BB) {
+ for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
+ BasicBlock *IBB = PN->getIncomingBlock(PI);
+ if (BBPreds.count(IBB) &&
+ !CanMergeValues(BBPN->getIncomingValueForBlock(IBB),
+ PN->getIncomingValue(PI))) {
+ DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
+ << Succ->getName() << " is conflicting with "
+ << BBPN->getName() << " with regard to common predecessor "
+ << IBB->getName() << "\n");
+ return false;
+ }
+ }
+ } else {
+ Value* Val = PN->getIncomingValueForBlock(BB);
+ for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
+ // See if the incoming value for the common predecessor is equal to the
+ // one for BB, in which case this phi node will not prevent the merging
+ // of the block.
+ BasicBlock *IBB = PN->getIncomingBlock(PI);
+ if (BBPreds.count(IBB) &&
+ !CanMergeValues(Val, PN->getIncomingValue(PI))) {
+ DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
+ << Succ->getName() << " is conflicting with regard to common "
+ << "predecessor " << IBB->getName() << "\n");
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+typedef SmallVector<BasicBlock *, 16> PredBlockVector;
+typedef DenseMap<BasicBlock *, Value *> IncomingValueMap;
+
+/// \brief Determines the value to use as the phi node input for a block.
+///
+/// Select between \p OldVal any value that we know flows from \p BB
+/// to a particular phi on the basis of which one (if either) is not
+/// undef. Update IncomingValues based on the selected value.
+///
+/// \param OldVal The value we are considering selecting.
+/// \param BB The block that the value flows in from.
+/// \param IncomingValues A map from block-to-value for other phi inputs
+/// that we have examined.
+///
+/// \returns the selected value.
+static Value *selectIncomingValueForBlock(Value *OldVal, BasicBlock *BB,
+ IncomingValueMap &IncomingValues) {
+ if (!isa<UndefValue>(OldVal)) {
+ assert((!IncomingValues.count(BB) ||
+ IncomingValues.find(BB)->second == OldVal) &&
+ "Expected OldVal to match incoming value from BB!");
+
+ IncomingValues.insert(std::make_pair(BB, OldVal));
+ return OldVal;
+ }
+
+ IncomingValueMap::const_iterator It = IncomingValues.find(BB);
+ if (It != IncomingValues.end()) return It->second;
+
+ return OldVal;
+}
+
+/// \brief Create a map from block to value for the operands of a
+/// given phi.
+///
+/// Create a map from block to value for each non-undef value flowing
+/// into \p PN.
+///
+/// \param PN The phi we are collecting the map for.
+/// \param IncomingValues [out] The map from block to value for this phi.
+static void gatherIncomingValuesToPhi(PHINode *PN,
+ IncomingValueMap &IncomingValues) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *BB = PN->getIncomingBlock(i);
+ Value *V = PN->getIncomingValue(i);
+
+ if (!isa<UndefValue>(V))
+ IncomingValues.insert(std::make_pair(BB, V));
+ }
+}
+
+/// \brief Replace the incoming undef values to a phi with the values
+/// from a block-to-value map.
+///
+/// \param PN The phi we are replacing the undefs in.
+/// \param IncomingValues A map from block to value.
+static void replaceUndefValuesInPhi(PHINode *PN,
+ const IncomingValueMap &IncomingValues) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = PN->getIncomingValue(i);
+
+ if (!isa<UndefValue>(V)) continue;
+
+ BasicBlock *BB = PN->getIncomingBlock(i);
+ IncomingValueMap::const_iterator It = IncomingValues.find(BB);
+ if (It == IncomingValues.end()) continue;
+
+ PN->setIncomingValue(i, It->second);
+ }
+}
+
+/// \brief Replace a value flowing from a block to a phi with
+/// potentially multiple instances of that value flowing from the
+/// block's predecessors to the phi.
+///
+/// \param BB The block with the value flowing into the phi.
+/// \param BBPreds The predecessors of BB.
+/// \param PN The phi that we are updating.
+static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB,
+ const PredBlockVector &BBPreds,
+ PHINode *PN) {
+ Value *OldVal = PN->removeIncomingValue(BB, false);
+ assert(OldVal && "No entry in PHI for Pred BB!");
+
+ IncomingValueMap IncomingValues;
+
+ // We are merging two blocks - BB, and the block containing PN - and
+ // as a result we need to redirect edges from the predecessors of BB
+ // to go to the block containing PN, and update PN
+ // accordingly. Since we allow merging blocks in the case where the
+ // predecessor and successor blocks both share some predecessors,
+ // and where some of those common predecessors might have undef
+ // values flowing into PN, we want to rewrite those values to be
+ // consistent with the non-undef values.
+
+ gatherIncomingValuesToPhi(PN, IncomingValues);
+
+ // If this incoming value is one of the PHI nodes in BB, the new entries
+ // in the PHI node are the entries from the old PHI.
+ if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
+ PHINode *OldValPN = cast<PHINode>(OldVal);
+ for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i) {
+ // Note that, since we are merging phi nodes and BB and Succ might
+ // have common predecessors, we could end up with a phi node with
+ // identical incoming branches. This will be cleaned up later (and
+ // will trigger asserts if we try to clean it up now, without also
+ // simplifying the corresponding conditional branch).
+ BasicBlock *PredBB = OldValPN->getIncomingBlock(i);
+ Value *PredVal = OldValPN->getIncomingValue(i);
+ Value *Selected = selectIncomingValueForBlock(PredVal, PredBB,
+ IncomingValues);
+
+ // And add a new incoming value for this predecessor for the
+ // newly retargeted branch.
+ PN->addIncoming(Selected, PredBB);
+ }
+ } else {
+ for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) {
+ // Update existing incoming values in PN for this
+ // predecessor of BB.
+ BasicBlock *PredBB = BBPreds[i];
+ Value *Selected = selectIncomingValueForBlock(OldVal, PredBB,
+ IncomingValues);
+
+ // And add a new incoming value for this predecessor for the
+ // newly retargeted branch.
+ PN->addIncoming(Selected, PredBB);
+ }
+ }
+
+ replaceUndefValuesInPhi(PN, IncomingValues);
+}
+
+/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
+/// unconditional branch, and contains no instructions other than PHI nodes,
+/// potential side-effect free intrinsics and the branch. If possible,
+/// eliminate BB by rewriting all the predecessors to branch to the successor
+/// block and return true. If we can't transform, return false.
+bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
+ assert(BB != &BB->getParent()->getEntryBlock() &&
+ "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
+
+ // We can't eliminate infinite loops.
+ BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
+ if (BB == Succ) return false;
+
+ // Check to see if merging these blocks would cause conflicts for any of the
+ // phi nodes in BB or Succ. If not, we can safely merge.
+ if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
+
+ // Check for cases where Succ has multiple predecessors and a PHI node in BB
+ // has uses which will not disappear when the PHI nodes are merged. It is
+ // possible to handle such cases, but difficult: it requires checking whether
+ // BB dominates Succ, which is non-trivial to calculate in the case where
+ // Succ has multiple predecessors. Also, it requires checking whether
+ // constructing the necessary self-referential PHI node doesn't introduce any
+ // conflicts; this isn't too difficult, but the previous code for doing this
+ // was incorrect.
+ //
+ // Note that if this check finds a live use, BB dominates Succ, so BB is
+ // something like a loop pre-header (or rarely, a part of an irreducible CFG);
+ // folding the branch isn't profitable in that case anyway.
+ if (!Succ->getSinglePredecessor()) {
+ BasicBlock::iterator BBI = BB->begin();
+ while (isa<PHINode>(*BBI)) {
+ for (Use &U : BBI->uses()) {
+ if (PHINode* PN = dyn_cast<PHINode>(U.getUser())) {
+ if (PN->getIncomingBlock(U) != BB)
+ return false;
+ } else {
+ return false;
+ }
+ }
+ ++BBI;
+ }
+ }
+
+ DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
+
+ if (isa<PHINode>(Succ->begin())) {
+ // If there is more than one pred of succ, and there are PHI nodes in
+ // the successor, then we need to add incoming edges for the PHI nodes
+ //
+ const PredBlockVector BBPreds(pred_begin(BB), pred_end(BB));
+
+ // Loop over all of the PHI nodes in the successor of BB.
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+
+ redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN);
+ }
+ }
+
+ if (Succ->getSinglePredecessor()) {
+ // BB is the only predecessor of Succ, so Succ will end up with exactly
+ // the same predecessors BB had.
+
+ // Copy over any phi, debug or lifetime instruction.
+ BB->getTerminator()->eraseFromParent();
+ Succ->getInstList().splice(Succ->getFirstNonPHI()->getIterator(),
+ BB->getInstList());
+ } else {
+ while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
+ // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
+ assert(PN->use_empty() && "There shouldn't be any uses here!");
+ PN->eraseFromParent();
+ }
+ }
+
+ // If the unconditional branch we replaced contains llvm.loop metadata, we
+ // add the metadata to the branch instructions in the predecessors.
+ unsigned LoopMDKind = BB->getContext().getMDKindID("llvm.loop");
+ Instruction *TI = BB->getTerminator();
+ if (TI)
+ if (MDNode *LoopMD = TI->getMetadata(LoopMDKind))
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *Pred = *PI;
+ Pred->getTerminator()->setMetadata(LoopMDKind, LoopMD);
+ }
+
+ // Everything that jumped to BB now goes to Succ.
+ BB->replaceAllUsesWith(Succ);
+ if (!Succ->hasName()) Succ->takeName(BB);
+ BB->eraseFromParent(); // Delete the old basic block.
+ return true;
+}
+
+/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI
+/// nodes in this block. This doesn't try to be clever about PHI nodes
+/// which differ only in the order of the incoming values, but instcombine
+/// orders them so it usually won't matter.
+///
+bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
+ // This implementation doesn't currently consider undef operands
+ // specially. Theoretically, two phis which are identical except for
+ // one having an undef where the other doesn't could be collapsed.
+
+ struct PHIDenseMapInfo {
+ static PHINode *getEmptyKey() {
+ return DenseMapInfo<PHINode *>::getEmptyKey();
+ }
+ static PHINode *getTombstoneKey() {
+ return DenseMapInfo<PHINode *>::getTombstoneKey();
+ }
+ static unsigned getHashValue(PHINode *PN) {
+ // Compute a hash value on the operands. Instcombine will likely have
+ // sorted them, which helps expose duplicates, but we have to check all
+ // the operands to be safe in case instcombine hasn't run.
+ return static_cast<unsigned>(hash_combine(
+ hash_combine_range(PN->value_op_begin(), PN->value_op_end()),
+ hash_combine_range(PN->block_begin(), PN->block_end())));
+ }
+ static bool isEqual(PHINode *LHS, PHINode *RHS) {
+ if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
+ RHS == getEmptyKey() || RHS == getTombstoneKey())
+ return LHS == RHS;
+ return LHS->isIdenticalTo(RHS);
+ }
+ };
+
+ // Set of unique PHINodes.
+ DenseSet<PHINode *, PHIDenseMapInfo> PHISet;
+
+ // Examine each PHI.
+ bool Changed = false;
+ for (auto I = BB->begin(); PHINode *PN = dyn_cast<PHINode>(I++);) {
+ auto Inserted = PHISet.insert(PN);
+ if (!Inserted.second) {
+ // A duplicate. Replace this PHI with its duplicate.
+ PN->replaceAllUsesWith(*Inserted.first);
+ PN->eraseFromParent();
+ Changed = true;
+
+ // The RAUW can change PHIs that we already visited. Start over from the
+ // beginning.
+ PHISet.clear();
+ I = BB->begin();
+ }
+ }
+
+ return Changed;
+}
+
+/// enforceKnownAlignment - If the specified pointer points to an object that
+/// we control, modify the object's alignment to PrefAlign. This isn't
+/// often possible though. If alignment is important, a more reliable approach
+/// is to simply align all global variables and allocation instructions to
+/// their preferred alignment from the beginning.
+///
+static unsigned enforceKnownAlignment(Value *V, unsigned Align,
+ unsigned PrefAlign,
+ const DataLayout &DL) {
+ assert(PrefAlign > Align);
+
+ V = V->stripPointerCasts();
+
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ // TODO: ideally, computeKnownBits ought to have used
+ // AllocaInst::getAlignment() in its computation already, making
+ // the below max redundant. But, as it turns out,
+ // stripPointerCasts recurses through infinite layers of bitcasts,
+ // while computeKnownBits is not allowed to traverse more than 6
+ // levels.
+ Align = std::max(AI->getAlignment(), Align);
+ if (PrefAlign <= Align)
+ return Align;
+
+ // If the preferred alignment is greater than the natural stack alignment
+ // then don't round up. This avoids dynamic stack realignment.
+ if (DL.exceedsNaturalStackAlignment(PrefAlign))
+ return Align;
+ AI->setAlignment(PrefAlign);
+ return PrefAlign;
+ }
+
+ if (auto *GO = dyn_cast<GlobalObject>(V)) {
+ // TODO: as above, this shouldn't be necessary.
+ Align = std::max(GO->getAlignment(), Align);
+ if (PrefAlign <= Align)
+ return Align;
+
+ // If there is a large requested alignment and we can, bump up the alignment
+ // of the global. If the memory we set aside for the global may not be the
+ // memory used by the final program then it is impossible for us to reliably
+ // enforce the preferred alignment.
+ if (!GO->canIncreaseAlignment())
+ return Align;
+
+ GO->setAlignment(PrefAlign);
+ return PrefAlign;
+ }
+
+ return Align;
+}
+
+unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
+ const DataLayout &DL,
+ const Instruction *CxtI,
+ AssumptionCache *AC,
+ const DominatorTree *DT) {
+ assert(V->getType()->isPointerTy() &&
+ "getOrEnforceKnownAlignment expects a pointer!");
+
+ KnownBits Known = computeKnownBits(V, DL, 0, AC, CxtI, DT);
+ unsigned TrailZ = Known.countMinTrailingZeros();
+
+ // Avoid trouble with ridiculously large TrailZ values, such as
+ // those computed from a null pointer.
+ TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
+
+ unsigned Align = 1u << std::min(Known.getBitWidth() - 1, TrailZ);
+
+ // LLVM doesn't support alignments larger than this currently.
+ Align = std::min(Align, +Value::MaximumAlignment);
+
+ if (PrefAlign > Align)
+ Align = enforceKnownAlignment(V, Align, PrefAlign, DL);
+
+ // We don't need to make any adjustment.
+ return Align;
+}
+
+///===---------------------------------------------------------------------===//
+/// Dbg Intrinsic utilities
+///
+
+/// See if there is a dbg.value intrinsic for DIVar before I.
+static bool LdStHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr,
+ Instruction *I) {
+ // Since we can't guarantee that the original dbg.declare instrinsic
+ // is removed by LowerDbgDeclare(), we need to make sure that we are
+ // not inserting the same dbg.value intrinsic over and over.
+ llvm::BasicBlock::InstListType::iterator PrevI(I);
+ if (PrevI != I->getParent()->getInstList().begin()) {
+ --PrevI;
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI))
+ if (DVI->getValue() == I->getOperand(0) &&
+ DVI->getOffset() == 0 &&
+ DVI->getVariable() == DIVar &&
+ DVI->getExpression() == DIExpr)
+ return true;
+ }
+ return false;
+}
+
+/// See if there is a dbg.value intrinsic for DIVar for the PHI node.
+static bool PhiHasDebugValue(DILocalVariable *DIVar,
+ DIExpression *DIExpr,
+ PHINode *APN) {
+ // Since we can't guarantee that the original dbg.declare instrinsic
+ // is removed by LowerDbgDeclare(), we need to make sure that we are
+ // not inserting the same dbg.value intrinsic over and over.
+ SmallVector<DbgValueInst *, 1> DbgValues;
+ findDbgValues(DbgValues, APN);
+ for (auto *DVI : DbgValues) {
+ assert(DVI->getValue() == APN);
+ assert(DVI->getOffset() == 0);
+ if ((DVI->getVariable() == DIVar) && (DVI->getExpression() == DIExpr))
+ return true;
+ }
+ return false;
+}
+
+/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
+/// that has an associated llvm.dbg.decl intrinsic.
+void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+ StoreInst *SI, DIBuilder &Builder) {
+ auto *DIVar = DDI->getVariable();
+ assert(DIVar && "Missing variable");
+ auto *DIExpr = DDI->getExpression();
+ Value *DV = SI->getOperand(0);
+
+ // If an argument is zero extended then use argument directly. The ZExt
+ // may be zapped by an optimization pass in future.
+ Argument *ExtendedArg = nullptr;
+ if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
+ ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
+ if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+ ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
+ if (ExtendedArg) {
+ // If this DDI was already describing only a fragment of a variable, ensure
+ // that fragment is appropriately narrowed here.
+ // But if a fragment wasn't used, describe the value as the original
+ // argument (rather than the zext or sext) so that it remains described even
+ // if the sext/zext is optimized away. This widens the variable description,
+ // leaving it up to the consumer to know how the smaller value may be
+ // represented in a larger register.
+ if (auto Fragment = DIExpr->getFragmentInfo()) {
+ unsigned FragmentOffset = Fragment->OffsetInBits;
+ SmallVector<uint64_t, 3> Ops(DIExpr->elements_begin(),
+ DIExpr->elements_end() - 3);
+ Ops.push_back(dwarf::DW_OP_LLVM_fragment);
+ Ops.push_back(FragmentOffset);
+ const DataLayout &DL = DDI->getModule()->getDataLayout();
+ Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType()));
+ DIExpr = Builder.createExpression(Ops);
+ }
+ DV = ExtendedArg;
+ }
+ if (!LdStHasDebugValue(DIVar, DIExpr, SI))
+ Builder.insertDbgValueIntrinsic(DV, 0, DIVar, DIExpr, DDI->getDebugLoc(),
+ SI);
+}
+
+/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
+/// that has an associated llvm.dbg.decl intrinsic.
+void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+ LoadInst *LI, DIBuilder &Builder) {
+ auto *DIVar = DDI->getVariable();
+ auto *DIExpr = DDI->getExpression();
+ assert(DIVar && "Missing variable");
+
+ if (LdStHasDebugValue(DIVar, DIExpr, LI))
+ return;
+
+ // We are now tracking the loaded value instead of the address. In the
+ // future if multi-location support is added to the IR, it might be
+ // preferable to keep tracking both the loaded value and the original
+ // address in case the alloca can not be elided.
+ Instruction *DbgValue = Builder.insertDbgValueIntrinsic(
+ LI, 0, DIVar, DIExpr, DDI->getDebugLoc(), (Instruction *)nullptr);
+ DbgValue->insertAfter(LI);
+}
+
+/// Inserts a llvm.dbg.value intrinsic after a phi
+/// that has an associated llvm.dbg.decl intrinsic.
+void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+ PHINode *APN, DIBuilder &Builder) {
+ auto *DIVar = DDI->getVariable();
+ auto *DIExpr = DDI->getExpression();
+ assert(DIVar && "Missing variable");
+
+ if (PhiHasDebugValue(DIVar, DIExpr, APN))
+ return;
+
+ BasicBlock *BB = APN->getParent();
+ auto InsertionPt = BB->getFirstInsertionPt();
+
+ // The block may be a catchswitch block, which does not have a valid
+ // insertion point.
+ // FIXME: Insert dbg.value markers in the successors when appropriate.
+ if (InsertionPt != BB->end())
+ Builder.insertDbgValueIntrinsic(APN, 0, DIVar, DIExpr, DDI->getDebugLoc(),
+ &*InsertionPt);
+}
+
+/// Determine whether this alloca is either a VLA or an array.
+static bool isArray(AllocaInst *AI) {
+ return AI->isArrayAllocation() ||
+ AI->getType()->getElementType()->isArrayTy();
+}
+
+/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set
+/// of llvm.dbg.value intrinsics.
+bool llvm::LowerDbgDeclare(Function &F) {
+ DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
+ SmallVector<DbgDeclareInst *, 4> Dbgs;
+ for (auto &FI : F)
+ for (Instruction &BI : FI)
+ if (auto DDI = dyn_cast<DbgDeclareInst>(&BI))
+ Dbgs.push_back(DDI);
+
+ if (Dbgs.empty())
+ return false;
+
+ for (auto &I : Dbgs) {
+ DbgDeclareInst *DDI = I;
+ AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress());
+ // If this is an alloca for a scalar variable, insert a dbg.value
+ // at each load and store to the alloca and erase the dbg.declare.
+ // The dbg.values allow tracking a variable even if it is not
+ // stored on the stack, while the dbg.declare can only describe
+ // the stack slot (and at a lexical-scope granularity). Later
+ // passes will attempt to elide the stack slot.
+ if (AI && !isArray(AI)) {
+ for (auto &AIUse : AI->uses()) {
+ User *U = AIUse.getUser();
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (AIUse.getOperandNo() == 1)
+ ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
+ } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
+ // This is a call by-value or some other instruction that
+ // takes a pointer to the variable. Insert a *value*
+ // intrinsic that describes the alloca.
+ DIB.insertDbgValueIntrinsic(AI, 0, DDI->getVariable(),
+ DDI->getExpression(), DDI->getDebugLoc(),
+ CI);
+ }
+ }
+ DDI->eraseFromParent();
+ }
+ }
+ return true;
+}
+
+/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the
+/// alloca 'V', if any.
+DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
+ if (auto *L = LocalAsMetadata::getIfExists(V))
+ if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))
+ for (User *U : MDV->users())
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
+ return DDI;
+
+ return nullptr;
+}
+
+void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
+ if (auto *L = LocalAsMetadata::getIfExists(V))
+ if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))
+ for (User *U : MDV->users())
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
+ DbgValues.push_back(DVI);
+}
+
+
+bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
+ Instruction *InsertBefore, DIBuilder &Builder,
+ bool Deref, int Offset) {
+ DbgDeclareInst *DDI = FindAllocaDbgDeclare(Address);
+ if (!DDI)
+ return false;
+ DebugLoc Loc = DDI->getDebugLoc();
+ auto *DIVar = DDI->getVariable();
+ auto *DIExpr = DDI->getExpression();
+ assert(DIVar && "Missing variable");
+ DIExpr = DIExpression::prepend(DIExpr, Deref, Offset);
+ // Insert llvm.dbg.declare immediately after the original alloca, and remove
+ // old llvm.dbg.declare.
+ Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore);
+ DDI->eraseFromParent();
+ return true;
+}
+
+bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
+ DIBuilder &Builder, bool Deref, int Offset) {
+ return replaceDbgDeclare(AI, NewAllocaAddress, AI->getNextNode(), Builder,
+ Deref, Offset);
+}
+
+static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
+ DIBuilder &Builder, int Offset) {
+ DebugLoc Loc = DVI->getDebugLoc();
+ auto *DIVar = DVI->getVariable();
+ auto *DIExpr = DVI->getExpression();
+ assert(DIVar && "Missing variable");
+
+ // This is an alloca-based llvm.dbg.value. The first thing it should do with
+ // the alloca pointer is dereference it. Otherwise we don't know how to handle
+ // it and give up.
+ if (!DIExpr || DIExpr->getNumElements() < 1 ||
+ DIExpr->getElement(0) != dwarf::DW_OP_deref)
+ return;
+
+ // Insert the offset immediately after the first deref.
+ // We could just change the offset argument of dbg.value, but it's unsigned...
+ if (Offset) {
+ SmallVector<uint64_t, 4> Ops;
+ Ops.push_back(dwarf::DW_OP_deref);
+ DIExpression::appendOffset(Ops, Offset);
+ Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end());
+ DIExpr = Builder.createExpression(Ops);
+ }
+
+ Builder.insertDbgValueIntrinsic(NewAddress, DVI->getOffset(), DIVar, DIExpr,
+ Loc, DVI);
+ DVI->eraseFromParent();
+}
+
+void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
+ DIBuilder &Builder, int Offset) {
+ if (auto *L = LocalAsMetadata::getIfExists(AI))
+ if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
+ for (auto UI = MDV->use_begin(), UE = MDV->use_end(); UI != UE;) {
+ Use &U = *UI++;
+ if (auto *DVI = dyn_cast<DbgValueInst>(U.getUser()))
+ replaceOneDbgValueForAlloca(DVI, NewAllocaAddress, Builder, Offset);
+ }
+}
+
+void llvm::salvageDebugInfo(Instruction &I) {
+ SmallVector<DbgValueInst *, 1> DbgValues;
+ auto &M = *I.getModule();
+
+ auto MDWrap = [&](Value *V) {
+ return MetadataAsValue::get(I.getContext(), ValueAsMetadata::get(V));
+ };
+
+ if (isa<BitCastInst>(&I)) {
+ findDbgValues(DbgValues, &I);
+ for (auto *DVI : DbgValues) {
+ // Bitcasts are entirely irrelevant for debug info. Rewrite the dbg.value
+ // to use the cast's source.
+ DVI->setOperand(0, MDWrap(I.getOperand(0)));
+ DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
+ }
+ } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+ findDbgValues(DbgValues, &I);
+ for (auto *DVI : DbgValues) {
+ unsigned BitWidth =
+ M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace());
+ APInt Offset(BitWidth, 0);
+ // Rewrite a constant GEP into a DIExpression. Since we are performing
+ // arithmetic to compute the variable's *value* in the DIExpression, we
+ // need to mark the expression with a DW_OP_stack_value.
+ if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) {
+ auto *DIExpr = DVI->getExpression();
+ DIBuilder DIB(M, /*AllowUnresolved*/ false);
+ // GEP offsets are i32 and thus always fit into an int64_t.
+ DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref,
+ Offset.getSExtValue(),
+ DIExpression::WithStackValue);
+ DVI->setOperand(0, MDWrap(I.getOperand(0)));
+ DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
+ DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
+ }
+ }
+ } else if (isa<LoadInst>(&I)) {
+ findDbgValues(DbgValues, &I);
+ for (auto *DVI : DbgValues) {
+ // Rewrite the load into DW_OP_deref.
+ auto *DIExpr = DVI->getExpression();
+ DIBuilder DIB(M, /*AllowUnresolved*/ false);
+ DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref);
+ DVI->setOperand(0, MDWrap(I.getOperand(0)));
+ DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
+ DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
+ }
+ }
+}
+
+unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
+ unsigned NumDeadInst = 0;
+ // Delete the instructions backwards, as it has a reduced likelihood of
+ // having to update as many def-use and use-def chains.
+ Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
+ while (EndInst != &BB->front()) {
+ // Delete the next to last instruction.
+ Instruction *Inst = &*--EndInst->getIterator();
+ if (!Inst->use_empty() && !Inst->getType()->isTokenTy())
+ Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
+ if (Inst->isEHPad() || Inst->getType()->isTokenTy()) {
+ EndInst = Inst;
+ continue;
+ }
+ if (!isa<DbgInfoIntrinsic>(Inst))
+ ++NumDeadInst;
+ Inst->eraseFromParent();
+ }
+ return NumDeadInst;
+}
+
+unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
+ bool PreserveLCSSA) {
+ BasicBlock *BB = I->getParent();
+ // Loop over all of the successors, removing BB's entry from any PHI
+ // nodes.
+ for (BasicBlock *Successor : successors(BB))
+ Successor->removePredecessor(BB, PreserveLCSSA);
+
+ // Insert a call to llvm.trap right before this. This turns the undefined
+ // behavior into a hard fail instead of falling through into random code.
+ if (UseLLVMTrap) {
+ Function *TrapFn =
+ Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
+ CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
+ CallTrap->setDebugLoc(I->getDebugLoc());
+ }
+ new UnreachableInst(I->getContext(), I);
+
+ // All instructions after this are dead.
+ unsigned NumInstrsRemoved = 0;
+ BasicBlock::iterator BBI = I->getIterator(), BBE = BB->end();
+ while (BBI != BBE) {
+ if (!BBI->use_empty())
+ BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+ BB->getInstList().erase(BBI++);
+ ++NumInstrsRemoved;
+ }
+ return NumInstrsRemoved;
+}
+
+/// changeToCall - Convert the specified invoke into a normal call.
+static void changeToCall(InvokeInst *II) {
+ SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ II->getOperandBundlesAsDefs(OpBundles);
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, OpBundles,
+ "", II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
+ II->replaceAllUsesWith(NewCall);
+
+ // Follow the call by a branch to the normal destination.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Update PHI nodes in the unwind destination
+ II->getUnwindDest()->removePredecessor(II->getParent());
+ II->eraseFromParent();
+}
+
+BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
+ BasicBlock *UnwindEdge) {
+ BasicBlock *BB = CI->getParent();
+
+ // Convert this function call into an invoke instruction. First, split the
+ // basic block.
+ BasicBlock *Split =
+ BB->splitBasicBlock(CI->getIterator(), CI->getName() + ".noexc");
+
+ // Delete the unconditional branch inserted by splitBasicBlock
+ BB->getInstList().pop_back();
+
+ // Create the new invoke instruction.
+ SmallVector<Value *, 8> InvokeArgs(CI->arg_begin(), CI->arg_end());
+ SmallVector<OperandBundleDef, 1> OpBundles;
+
+ CI->getOperandBundlesAsDefs(OpBundles);
+
+ // Note: we're round tripping operand bundles through memory here, and that
+ // can potentially be avoided with a cleverer API design that we do not have
+ // as of this time.
+
+ InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge,
+ InvokeArgs, OpBundles, CI->getName(), BB);
+ II->setDebugLoc(CI->getDebugLoc());
+ II->setCallingConv(CI->getCallingConv());
+ II->setAttributes(CI->getAttributes());
+
+ // Make sure that anything using the call now uses the invoke! This also
+ // updates the CallGraph if present, because it uses a WeakTrackingVH.
+ CI->replaceAllUsesWith(II);
+
+ // Delete the original call
+ Split->getInstList().pop_front();
+ return Split;
+}
+
+static bool markAliveBlocks(Function &F,
+ SmallPtrSetImpl<BasicBlock*> &Reachable) {
+
+ SmallVector<BasicBlock*, 128> Worklist;
+ BasicBlock *BB = &F.front();
+ Worklist.push_back(BB);
+ Reachable.insert(BB);
+ bool Changed = false;
+ do {
+ BB = Worklist.pop_back_val();
+
+ // Do a quick scan of the basic block, turning any obviously unreachable
+ // instructions into LLVM unreachable insts. The instruction combining pass
+ // canonicalizes unreachable insts into stores to null or undef.
+ for (Instruction &I : *BB) {
+ // Assumptions that are known to be false are equivalent to unreachable.
+ // Also, if the condition is undefined, then we make the choice most
+ // beneficial to the optimizer, and choose that to also be unreachable.
+ if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ if (II->getIntrinsicID() == Intrinsic::assume) {
+ if (match(II->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) {
+ // Don't insert a call to llvm.trap right before the unreachable.
+ changeToUnreachable(II, false);
+ Changed = true;
+ break;
+ }
+ }
+
+ if (II->getIntrinsicID() == Intrinsic::experimental_guard) {
+ // A call to the guard intrinsic bails out of the current compilation
+ // unit if the predicate passed to it is false. If the predicate is a
+ // constant false, then we know the guard will bail out of the current
+ // compile unconditionally, so all code following it is dead.
+ //
+ // Note: unlike in llvm.assume, it is not "obviously profitable" for
+ // guards to treat `undef` as `false` since a guard on `undef` can
+ // still be useful for widening.
+ if (match(II->getArgOperand(0), m_Zero()))
+ if (!isa<UnreachableInst>(II->getNextNode())) {
+ changeToUnreachable(II->getNextNode(), /*UseLLVMTrap=*/ false);
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ Value *Callee = CI->getCalledValue();
+ if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+ changeToUnreachable(CI, /*UseLLVMTrap=*/false);
+ Changed = true;
+ break;
+ }
+ if (CI->doesNotReturn()) {
+ // If we found a call to a no-return function, insert an unreachable
+ // instruction after it. Make sure there isn't *already* one there
+ // though.
+ if (!isa<UnreachableInst>(CI->getNextNode())) {
+ // Don't insert a call to llvm.trap right before the unreachable.
+ changeToUnreachable(CI->getNextNode(), false);
+ Changed = true;
+ }
+ break;
+ }
+ }
+
+ // Store to undef and store to null are undefined and used to signal that
+ // they should be changed to unreachable by passes that can't modify the
+ // CFG.
+ if (auto *SI = dyn_cast<StoreInst>(&I)) {
+ // Don't touch volatile stores.
+ if (SI->isVolatile()) continue;
+
+ Value *Ptr = SI->getOperand(1);
+
+ if (isa<UndefValue>(Ptr) ||
+ (isa<ConstantPointerNull>(Ptr) &&
+ SI->getPointerAddressSpace() == 0)) {
+ changeToUnreachable(SI, true);
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ TerminatorInst *Terminator = BB->getTerminator();
+ if (auto *II = dyn_cast<InvokeInst>(Terminator)) {
+ // Turn invokes that call 'nounwind' functions into ordinary calls.
+ Value *Callee = II->getCalledValue();
+ if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+ changeToUnreachable(II, true);
+ Changed = true;
+ } else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) {
+ if (II->use_empty() && II->onlyReadsMemory()) {
+ // jump to the normal destination branch.
+ BranchInst::Create(II->getNormalDest(), II);
+ II->getUnwindDest()->removePredecessor(II->getParent());
+ II->eraseFromParent();
+ } else
+ changeToCall(II);
+ Changed = true;
+ }
+ } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Terminator)) {
+ // Remove catchpads which cannot be reached.
+ struct CatchPadDenseMapInfo {
+ static CatchPadInst *getEmptyKey() {
+ return DenseMapInfo<CatchPadInst *>::getEmptyKey();
+ }
+ static CatchPadInst *getTombstoneKey() {
+ return DenseMapInfo<CatchPadInst *>::getTombstoneKey();
+ }
+ static unsigned getHashValue(CatchPadInst *CatchPad) {
+ return static_cast<unsigned>(hash_combine_range(
+ CatchPad->value_op_begin(), CatchPad->value_op_end()));
+ }
+ static bool isEqual(CatchPadInst *LHS, CatchPadInst *RHS) {
+ if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
+ RHS == getEmptyKey() || RHS == getTombstoneKey())
+ return LHS == RHS;
+ return LHS->isIdenticalTo(RHS);
+ }
+ };
+
+ // Set of unique CatchPads.
+ SmallDenseMap<CatchPadInst *, detail::DenseSetEmpty, 4,
+ CatchPadDenseMapInfo, detail::DenseSetPair<CatchPadInst *>>
+ HandlerSet;
+ detail::DenseSetEmpty Empty;
+ for (CatchSwitchInst::handler_iterator I = CatchSwitch->handler_begin(),
+ E = CatchSwitch->handler_end();
+ I != E; ++I) {
+ BasicBlock *HandlerBB = *I;
+ auto *CatchPad = cast<CatchPadInst>(HandlerBB->getFirstNonPHI());
+ if (!HandlerSet.insert({CatchPad, Empty}).second) {
+ CatchSwitch->removeHandler(I);
+ --I;
+ --E;
+ Changed = true;
+ }
+ }
+ }
+
+ Changed |= ConstantFoldTerminator(BB, true);
+ for (BasicBlock *Successor : successors(BB))
+ if (Reachable.insert(Successor).second)
+ Worklist.push_back(Successor);
+ } while (!Worklist.empty());
+ return Changed;
+}
+
+void llvm::removeUnwindEdge(BasicBlock *BB) {
+ TerminatorInst *TI = BB->getTerminator();
+
+ if (auto *II = dyn_cast<InvokeInst>(TI)) {
+ changeToCall(II);
+ return;
+ }
+
+ TerminatorInst *NewTI;
+ BasicBlock *UnwindDest;
+
+ if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
+ NewTI = CleanupReturnInst::Create(CRI->getCleanupPad(), nullptr, CRI);
+ UnwindDest = CRI->getUnwindDest();
+ } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) {
+ auto *NewCatchSwitch = CatchSwitchInst::Create(
+ CatchSwitch->getParentPad(), nullptr, CatchSwitch->getNumHandlers(),
+ CatchSwitch->getName(), CatchSwitch);
+ for (BasicBlock *PadBB : CatchSwitch->handlers())
+ NewCatchSwitch->addHandler(PadBB);
+
+ NewTI = NewCatchSwitch;
+ UnwindDest = CatchSwitch->getUnwindDest();
+ } else {
+ llvm_unreachable("Could not find unwind successor");
+ }
+
+ NewTI->takeName(TI);
+ NewTI->setDebugLoc(TI->getDebugLoc());
+ UnwindDest->removePredecessor(BB);
+ TI->replaceAllUsesWith(NewTI);
+ TI->eraseFromParent();
+}
+
+/// removeUnreachableBlocks - Remove blocks that are not reachable, even
+/// if they are in a dead cycle. Return true if a change was made, false
+/// otherwise. If `LVI` is passed, this function preserves LazyValueInfo
+/// after modifying the CFG.
+bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) {
+ SmallPtrSet<BasicBlock*, 16> Reachable;
+ bool Changed = markAliveBlocks(F, Reachable);
+
+ // If there are unreachable blocks in the CFG...
+ if (Reachable.size() == F.size())
+ return Changed;
+
+ assert(Reachable.size() < F.size());
+ NumRemoved += F.size()-Reachable.size();
+
+ // Loop over all of the basic blocks that are not reachable, dropping all of
+ // their internal references...
+ for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
+ if (Reachable.count(&*BB))
+ continue;
+
+ for (BasicBlock *Successor : successors(&*BB))
+ if (Reachable.count(Successor))
+ Successor->removePredecessor(&*BB);
+ if (LVI)
+ LVI->eraseBlock(&*BB);
+ BB->dropAllReferences();
+ }
+
+ for (Function::iterator I = ++F.begin(); I != F.end();)
+ if (!Reachable.count(&*I))
+ I = F.getBasicBlockList().erase(I);
+ else
+ ++I;
+
+ return true;
+}
+
+void llvm::combineMetadata(Instruction *K, const Instruction *J,
+ ArrayRef<unsigned> KnownIDs) {
+ SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
+ K->dropUnknownNonDebugMetadata(KnownIDs);
+ K->getAllMetadataOtherThanDebugLoc(Metadata);
+ for (const auto &MD : Metadata) {
+ unsigned Kind = MD.first;
+ MDNode *JMD = J->getMetadata(Kind);
+ MDNode *KMD = MD.second;
+
+ switch (Kind) {
+ default:
+ K->setMetadata(Kind, nullptr); // Remove unknown metadata
+ break;
+ case LLVMContext::MD_dbg:
+ llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg");
+ case LLVMContext::MD_tbaa:
+ K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD));
+ break;
+ case LLVMContext::MD_alias_scope:
+ K->setMetadata(Kind, MDNode::getMostGenericAliasScope(JMD, KMD));
+ break;
+ case LLVMContext::MD_noalias:
+ case LLVMContext::MD_mem_parallel_loop_access:
+ K->setMetadata(Kind, MDNode::intersect(JMD, KMD));
+ break;
+ case LLVMContext::MD_range:
+ K->setMetadata(Kind, MDNode::getMostGenericRange(JMD, KMD));
+ break;
+ case LLVMContext::MD_fpmath:
+ K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD));
+ break;
+ case LLVMContext::MD_invariant_load:
+ // Only set the !invariant.load if it is present in both instructions.
+ K->setMetadata(Kind, JMD);
+ break;
+ case LLVMContext::MD_nonnull:
+ // Only set the !nonnull if it is present in both instructions.
+ K->setMetadata(Kind, JMD);
+ break;
+ case LLVMContext::MD_invariant_group:
+ // Preserve !invariant.group in K.
+ break;
+ case LLVMContext::MD_align:
+ K->setMetadata(Kind,
+ MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
+ break;
+ case LLVMContext::MD_dereferenceable:
+ case LLVMContext::MD_dereferenceable_or_null:
+ K->setMetadata(Kind,
+ MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
+ break;
+ }
+ }
+ // Set !invariant.group from J if J has it. If both instructions have it
+ // then we will just pick it from J - even when they are different.
+ // Also make sure that K is load or store - f.e. combining bitcast with load
+ // could produce bitcast with invariant.group metadata, which is invalid.
+ // FIXME: we should try to preserve both invariant.group md if they are
+ // different, but right now instruction can only have one invariant.group.
+ if (auto *JMD = J->getMetadata(LLVMContext::MD_invariant_group))
+ if (isa<LoadInst>(K) || isa<StoreInst>(K))
+ K->setMetadata(LLVMContext::MD_invariant_group, JMD);
+}
+
+void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J) {
+ unsigned KnownIDs[] = {
+ LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_range,
+ LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull,
+ LLVMContext::MD_invariant_group, LLVMContext::MD_align,
+ LLVMContext::MD_dereferenceable,
+ LLVMContext::MD_dereferenceable_or_null};
+ combineMetadata(K, J, KnownIDs);
+}
+
+template <typename RootType, typename DominatesFn>
+static unsigned replaceDominatedUsesWith(Value *From, Value *To,
+ const RootType &Root,
+ const DominatesFn &Dominates) {
+ assert(From->getType() == To->getType());
+
+ unsigned Count = 0;
+ for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
+ UI != UE;) {
+ Use &U = *UI++;
+ if (!Dominates(Root, U))
+ continue;
+ U.set(To);
+ DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as "
+ << *To << " in " << *U << "\n");
+ ++Count;
+ }
+ return Count;
+}
+
+unsigned llvm::replaceNonLocalUsesWith(Instruction *From, Value *To) {
+ assert(From->getType() == To->getType());
+ auto *BB = From->getParent();
+ unsigned Count = 0;
+
+ for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
+ UI != UE;) {
+ Use &U = *UI++;
+ auto *I = cast<Instruction>(U.getUser());
+ if (I->getParent() == BB)
+ continue;
+ U.set(To);
+ ++Count;
+ }
+ return Count;
+}
+
+unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
+ DominatorTree &DT,
+ const BasicBlockEdge &Root) {
+ auto Dominates = [&DT](const BasicBlockEdge &Root, const Use &U) {
+ return DT.dominates(Root, U);
+ };
+ return ::replaceDominatedUsesWith(From, To, Root, Dominates);
+}
+
+unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
+ DominatorTree &DT,
+ const BasicBlock *BB) {
+ auto ProperlyDominates = [&DT](const BasicBlock *BB, const Use &U) {
+ auto *I = cast<Instruction>(U.getUser())->getParent();
+ return DT.properlyDominates(BB, I);
+ };
+ return ::replaceDominatedUsesWith(From, To, BB, ProperlyDominates);
+}
+
+bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {
+ // Check if the function is specifically marked as a gc leaf function.
+ if (CS.hasFnAttr("gc-leaf-function"))
+ return true;
+ if (const Function *F = CS.getCalledFunction()) {
+ if (F->hasFnAttribute("gc-leaf-function"))
+ return true;
+
+ if (auto IID = F->getIntrinsicID())
+ // Most LLVM intrinsics do not take safepoints.
+ return IID != Intrinsic::experimental_gc_statepoint &&
+ IID != Intrinsic::experimental_deoptimize;
+ }
+
+ return false;
+}
+
+void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N,
+ LoadInst &NewLI) {
+ auto *NewTy = NewLI.getType();
+
+ // This only directly applies if the new type is also a pointer.
+ if (NewTy->isPointerTy()) {
+ NewLI.setMetadata(LLVMContext::MD_nonnull, N);
+ return;
+ }
+
+ // The only other translation we can do is to integral loads with !range
+ // metadata.
+ if (!NewTy->isIntegerTy())
+ return;
+
+ MDBuilder MDB(NewLI.getContext());
+ const Value *Ptr = OldLI.getPointerOperand();
+ auto *ITy = cast<IntegerType>(NewTy);
+ auto *NullInt = ConstantExpr::getPtrToInt(
+ ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
+ auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
+ NewLI.setMetadata(LLVMContext::MD_range,
+ MDB.createRange(NonNullInt, NullInt));
+}
+
+void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
+ MDNode *N, LoadInst &NewLI) {
+ auto *NewTy = NewLI.getType();
+
+ // Give up unless it is converted to a pointer where there is a single very
+ // valuable mapping we can do reliably.
+ // FIXME: It would be nice to propagate this in more ways, but the type
+ // conversions make it hard.
+ if (!NewTy->isPointerTy())
+ return;
+
+ unsigned BitWidth = DL.getTypeSizeInBits(NewTy);
+ if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
+ MDNode *NN = MDNode::get(OldLI.getContext(), None);
+ NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
+ }
+}
+
+namespace {
+/// A potential constituent of a bitreverse or bswap expression. See
+/// collectBitParts for a fuller explanation.
+struct BitPart {
+ BitPart(Value *P, unsigned BW) : Provider(P) {
+ Provenance.resize(BW);
+ }
+
+ /// The Value that this is a bitreverse/bswap of.
+ Value *Provider;
+ /// The "provenance" of each bit. Provenance[A] = B means that bit A
+ /// in Provider becomes bit B in the result of this expression.
+ SmallVector<int8_t, 32> Provenance; // int8_t means max size is i128.
+
+ enum { Unset = -1 };
+};
+} // end anonymous namespace
+
+/// Analyze the specified subexpression and see if it is capable of providing
+/// pieces of a bswap or bitreverse. The subexpression provides a potential
+/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in
+/// the output of the expression came from a corresponding bit in some other
+/// value. This function is recursive, and the end result is a mapping of
+/// bitnumber to bitnumber. It is the caller's responsibility to validate that
+/// the bitnumber to bitnumber mapping is correct for a bswap or bitreverse.
+///
+/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know
+/// that the expression deposits the low byte of %X into the high byte of the
+/// result and that all other bits are zero. This expression is accepted and a
+/// BitPart is returned with Provider set to %X and Provenance[24-31] set to
+/// [0-7].
+///
+/// To avoid revisiting values, the BitPart results are memoized into the
+/// provided map. To avoid unnecessary copying of BitParts, BitParts are
+/// constructed in-place in the \c BPS map. Because of this \c BPS needs to
+/// store BitParts objects, not pointers. As we need the concept of a nullptr
+/// BitParts (Value has been analyzed and the analysis failed), we an Optional
+/// type instead to provide the same functionality.
+///
+/// Because we pass around references into \c BPS, we must use a container that
+/// does not invalidate internal references (std::map instead of DenseMap).
+///
+static const Optional<BitPart> &
+collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
+ std::map<Value *, Optional<BitPart>> &BPS) {
+ auto I = BPS.find(V);
+ if (I != BPS.end())
+ return I->second;
+
+ auto &Result = BPS[V] = None;
+ auto BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ // If this is an or instruction, it may be an inner node of the bswap.
+ if (I->getOpcode() == Instruction::Or) {
+ auto &A = collectBitParts(I->getOperand(0), MatchBSwaps,
+ MatchBitReversals, BPS);
+ auto &B = collectBitParts(I->getOperand(1), MatchBSwaps,
+ MatchBitReversals, BPS);
+ if (!A || !B)
+ return Result;
+
+ // Try and merge the two together.
+ if (!A->Provider || A->Provider != B->Provider)
+ return Result;
+
+ Result = BitPart(A->Provider, BitWidth);
+ for (unsigned i = 0; i < A->Provenance.size(); ++i) {
+ if (A->Provenance[i] != BitPart::Unset &&
+ B->Provenance[i] != BitPart::Unset &&
+ A->Provenance[i] != B->Provenance[i])
+ return Result = None;
+
+ if (A->Provenance[i] == BitPart::Unset)
+ Result->Provenance[i] = B->Provenance[i];
+ else
+ Result->Provenance[i] = A->Provenance[i];
+ }
+
+ return Result;
+ }
+
+ // If this is a logical shift by a constant, recurse then shift the result.
+ if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
+ unsigned BitShift =
+ cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
+ // Ensure the shift amount is defined.
+ if (BitShift > BitWidth)
+ return Result;
+
+ auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
+ MatchBitReversals, BPS);
+ if (!Res)
+ return Result;
+ Result = Res;
+
+ // Perform the "shift" on BitProvenance.
+ auto &P = Result->Provenance;
+ if (I->getOpcode() == Instruction::Shl) {
+ P.erase(std::prev(P.end(), BitShift), P.end());
+ P.insert(P.begin(), BitShift, BitPart::Unset);
+ } else {
+ P.erase(P.begin(), std::next(P.begin(), BitShift));
+ P.insert(P.end(), BitShift, BitPart::Unset);
+ }
+
+ return Result;
+ }
+
+ // If this is a logical 'and' with a mask that clears bits, recurse then
+ // unset the appropriate bits.
+ if (I->getOpcode() == Instruction::And &&
+ isa<ConstantInt>(I->getOperand(1))) {
+ APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1);
+ const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
+
+ // Check that the mask allows a multiple of 8 bits for a bswap, for an
+ // early exit.
+ unsigned NumMaskedBits = AndMask.countPopulation();
+ if (!MatchBitReversals && NumMaskedBits % 8 != 0)
+ return Result;
+
+ auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
+ MatchBitReversals, BPS);
+ if (!Res)
+ return Result;
+ Result = Res;
+
+ for (unsigned i = 0; i < BitWidth; ++i, Bit <<= 1)
+ // If the AndMask is zero for this bit, clear the bit.
+ if ((AndMask & Bit) == 0)
+ Result->Provenance[i] = BitPart::Unset;
+ return Result;
+ }
+
+ // If this is a zext instruction zero extend the result.
+ if (I->getOpcode() == Instruction::ZExt) {
+ auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
+ MatchBitReversals, BPS);
+ if (!Res)
+ return Result;
+
+ Result = BitPart(Res->Provider, BitWidth);
+ auto NarrowBitWidth =
+ cast<IntegerType>(cast<ZExtInst>(I)->getSrcTy())->getBitWidth();
+ for (unsigned i = 0; i < NarrowBitWidth; ++i)
+ Result->Provenance[i] = Res->Provenance[i];
+ for (unsigned i = NarrowBitWidth; i < BitWidth; ++i)
+ Result->Provenance[i] = BitPart::Unset;
+ return Result;
+ }
+ }
+
+ // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be
+ // the input value to the bswap/bitreverse.
+ Result = BitPart(V, BitWidth);
+ for (unsigned i = 0; i < BitWidth; ++i)
+ Result->Provenance[i] = i;
+ return Result;
+}
+
+static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To,
+ unsigned BitWidth) {
+ if (From % 8 != To % 8)
+ return false;
+ // Convert from bit indices to byte indices and check for a byte reversal.
+ From >>= 3;
+ To >>= 3;
+ BitWidth >>= 3;
+ return From == BitWidth - To - 1;
+}
+
+static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To,
+ unsigned BitWidth) {
+ return From == BitWidth - To - 1;
+}
+
+/// Given an OR instruction, check to see if this is a bitreverse
+/// idiom. If so, insert the new intrinsic and return true.
+bool llvm::recognizeBSwapOrBitReverseIdiom(
+ Instruction *I, bool MatchBSwaps, bool MatchBitReversals,
+ SmallVectorImpl<Instruction *> &InsertedInsts) {
+ if (Operator::getOpcode(I) != Instruction::Or)
+ return false;
+ if (!MatchBSwaps && !MatchBitReversals)
+ return false;
+ IntegerType *ITy = dyn_cast<IntegerType>(I->getType());
+ if (!ITy || ITy->getBitWidth() > 128)
+ return false; // Can't do vectors or integers > 128 bits.
+ unsigned BW = ITy->getBitWidth();
+
+ unsigned DemandedBW = BW;
+ IntegerType *DemandedTy = ITy;
+ if (I->hasOneUse()) {
+ if (TruncInst *Trunc = dyn_cast<TruncInst>(I->user_back())) {
+ DemandedTy = cast<IntegerType>(Trunc->getType());
+ DemandedBW = DemandedTy->getBitWidth();
+ }
+ }
+
+ // Try to find all the pieces corresponding to the bswap.
+ std::map<Value *, Optional<BitPart>> BPS;
+ auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS);
+ if (!Res)
+ return false;
+ auto &BitProvenance = Res->Provenance;
+
+ // Now, is the bit permutation correct for a bswap or a bitreverse? We can
+ // only byteswap values with an even number of bytes.
+ bool OKForBSwap = DemandedBW % 16 == 0, OKForBitReverse = true;
+ for (unsigned i = 0; i < DemandedBW; ++i) {
+ OKForBSwap &=
+ bitTransformIsCorrectForBSwap(BitProvenance[i], i, DemandedBW);
+ OKForBitReverse &=
+ bitTransformIsCorrectForBitReverse(BitProvenance[i], i, DemandedBW);
+ }
+
+ Intrinsic::ID Intrin;
+ if (OKForBSwap && MatchBSwaps)
+ Intrin = Intrinsic::bswap;
+ else if (OKForBitReverse && MatchBitReversals)
+ Intrin = Intrinsic::bitreverse;
+ else
+ return false;
+
+ if (ITy != DemandedTy) {
+ Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy);
+ Value *Provider = Res->Provider;
+ IntegerType *ProviderTy = cast<IntegerType>(Provider->getType());
+ // We may need to truncate the provider.
+ if (DemandedTy != ProviderTy) {
+ auto *Trunc = CastInst::Create(Instruction::Trunc, Provider, DemandedTy,
+ "trunc", I);
+ InsertedInsts.push_back(Trunc);
+ Provider = Trunc;
+ }
+ auto *CI = CallInst::Create(F, Provider, "rev", I);
+ InsertedInsts.push_back(CI);
+ auto *ExtInst = CastInst::Create(Instruction::ZExt, CI, ITy, "zext", I);
+ InsertedInsts.push_back(ExtInst);
+ return true;
+ }
+
+ Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, ITy);
+ InsertedInsts.push_back(CallInst::Create(F, Res->Provider, "rev", I));
+ return true;
+}
+
+// CodeGen has special handling for some string functions that may replace
+// them with target-specific intrinsics. Since that'd skip our interceptors
+// in ASan/MSan/TSan/DFSan, and thus make us miss some memory accesses,
+// we mark affected calls as NoBuiltin, which will disable optimization
+// in CodeGen.
+void llvm::maybeMarkSanitizerLibraryCallNoBuiltin(
+ CallInst *CI, const TargetLibraryInfo *TLI) {
+ Function *F = CI->getCalledFunction();
+ LibFunc Func;
+ if (F && !F->hasLocalLinkage() && F->hasName() &&
+ TLI->getLibFunc(F->getName(), Func) && TLI->hasOptimizedCodeGen(Func) &&
+ !F->doesNotAccessMemory())
+ CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoBuiltin);
+}
+
+bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
+ // We can't have a PHI with a metadata type.
+ if (I->getOperand(OpIdx)->getType()->isMetadataTy())
+ return false;
+
+ // Early exit.
+ if (!isa<Constant>(I->getOperand(OpIdx)))
+ return true;
+
+ switch (I->getOpcode()) {
+ default:
+ return true;
+ case Instruction::Call:
+ case Instruction::Invoke:
+ // Can't handle inline asm. Skip it.
+ if (isa<InlineAsm>(ImmutableCallSite(I).getCalledValue()))
+ return false;
+ // Many arithmetic intrinsics have no issue taking a
+ // variable, however it's hard to distingish these from
+ // specials such as @llvm.frameaddress that require a constant.
+ if (isa<IntrinsicInst>(I))
+ return false;
+
+ // Constant bundle operands may need to retain their constant-ness for
+ // correctness.
+ if (ImmutableCallSite(I).isBundleOperand(OpIdx))
+ return false;
+ return true;
+ case Instruction::ShuffleVector:
+ // Shufflevector masks are constant.
+ return OpIdx != 2;
+ case Instruction::Switch:
+ case Instruction::ExtractValue:
+ // All operands apart from the first are constant.
+ return OpIdx == 0;
+ case Instruction::InsertValue:
+ // All operands apart from the first and the second are constant.
+ return OpIdx < 2;
+ case Instruction::Alloca:
+ // Static allocas (constant size in the entry block) are handled by
+ // prologue/epilogue insertion so they're free anyway. We definitely don't
+ // want to make them non-constant.
+ return !dyn_cast<AllocaInst>(I)->isStaticAlloca();
+ case Instruction::GetElementPtr:
+ if (OpIdx == 0)
+ return true;
+ gep_type_iterator It = gep_type_begin(I);
+ for (auto E = std::next(It, OpIdx); It != E; ++It)
+ if (It.isStruct())
+ return false;
+ return true;
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
new file mode 100644
index 000000000000..e21e34df8ded
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -0,0 +1,877 @@
+//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs several transformations to transform natural loops into a
+// simpler form, which makes subsequent analyses and transformations simpler and
+// more effective.
+//
+// Loop pre-header insertion guarantees that there is a single, non-critical
+// entry edge from outside of the loop to the loop header. This simplifies a
+// number of analyses and transformations, such as LICM.
+//
+// Loop exit-block insertion guarantees that all exit blocks from the loop
+// (blocks which are outside of the loop that have predecessors inside of the
+// loop) only have predecessors from inside of the loop (and are thus dominated
+// by the loop header). This simplifies transformations such as store-sinking
+// that are built into LICM.
+//
+// This pass also guarantees that loops will have exactly one backedge.
+//
+// Indirectbr instructions introduce several complications. If the loop
+// contains or is entered by an indirectbr instruction, it may not be possible
+// to transform the loop and make these guarantees. Client code should check
+// that these conditions are true before relying on them.
+//
+// Note that the simplifycfg pass will clean up blocks which are split out but
+// end up being unnecessary, so usage of this pass should not pessimize
+// generated code.
+//
+// This pass obviously modifies the CFG, but updates loop information and
+// dominator information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LoopSimplify.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-simplify"
+
+STATISTIC(NumNested , "Number of nested loops split out");
+
+// If the block isn't already, move the new block to right after some 'outside
+// block' block. This prevents the preheader from being placed inside the loop
+// body, e.g. when the loop hasn't been rotated.
+static void placeSplitBlockCarefully(BasicBlock *NewBB,
+ SmallVectorImpl<BasicBlock *> &SplitPreds,
+ Loop *L) {
+ // Check to see if NewBB is already well placed.
+ Function::iterator BBI = --NewBB->getIterator();
+ for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+ if (&*BBI == SplitPreds[i])
+ return;
+ }
+
+ // If it isn't already after an outside block, move it after one. This is
+ // always good as it makes the uncond branch from the outside block into a
+ // fall-through.
+
+ // Figure out *which* outside block to put this after. Prefer an outside
+ // block that neighbors a BB actually in the loop.
+ BasicBlock *FoundBB = nullptr;
+ for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+ Function::iterator BBI = SplitPreds[i]->getIterator();
+ if (++BBI != NewBB->getParent()->end() && L->contains(&*BBI)) {
+ FoundBB = SplitPreds[i];
+ break;
+ }
+ }
+
+ // If our heuristic for a *good* bb to place this after doesn't find
+ // anything, just pick something. It's likely better than leaving it within
+ // the loop.
+ if (!FoundBB)
+ FoundBB = SplitPreds[0];
+ NewBB->moveAfter(FoundBB);
+}
+
+/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a
+/// preheader, this method is called to insert one. This method has two phases:
+/// preheader insertion and analysis updating.
+///
+BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
+ LoopInfo *LI, bool PreserveLCSSA) {
+ BasicBlock *Header = L->getHeader();
+
+ // Compute the set of predecessors of the loop that are not in the loop.
+ SmallVector<BasicBlock*, 8> OutsideBlocks;
+ for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
+ PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (!L->contains(P)) { // Coming in from outside the loop?
+ // If the loop is branched to from an indirect branch, we won't
+ // be able to fully transform the loop, because it prohibits
+ // edge splitting.
+ if (isa<IndirectBrInst>(P->getTerminator())) return nullptr;
+
+ // Keep track of it.
+ OutsideBlocks.push_back(P);
+ }
+ }
+
+ // Split out the loop pre-header.
+ BasicBlock *PreheaderBB;
+ PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT,
+ LI, PreserveLCSSA);
+ if (!PreheaderBB)
+ return nullptr;
+
+ DEBUG(dbgs() << "LoopSimplify: Creating pre-header "
+ << PreheaderBB->getName() << "\n");
+
+ // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+ // code layout too horribly.
+ placeSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
+
+ return PreheaderBB;
+}
+
+/// Add the specified block, and all of its predecessors, to the specified set,
+/// if it's not already in there. Stop predecessor traversal when we reach
+/// StopBlock.
+static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
+ std::set<BasicBlock*> &Blocks) {
+ SmallVector<BasicBlock *, 8> Worklist;
+ Worklist.push_back(InputBB);
+ do {
+ BasicBlock *BB = Worklist.pop_back_val();
+ if (Blocks.insert(BB).second && BB != StopBlock)
+ // If BB is not already processed and it is not a stop block then
+ // insert its predecessor in the work list
+ for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+ BasicBlock *WBB = *I;
+ Worklist.push_back(WBB);
+ }
+ } while (!Worklist.empty());
+}
+
+/// \brief The first part of loop-nestification is to find a PHI node that tells
+/// us how to partition the loops.
+static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,
+ AssumptionCache *AC) {
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
+ PHINode *PN = cast<PHINode>(I);
+ ++I;
+ if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) {
+ // This is a degenerate PHI already, don't modify it!
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ continue;
+ }
+
+ // Scan this PHI node looking for a use of the PHI node by itself.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == PN &&
+ L->contains(PN->getIncomingBlock(i)))
+ // We found something tasty to remove.
+ return PN;
+ }
+ return nullptr;
+}
+
+/// \brief If this loop has multiple backedges, try to pull one of them out into
+/// a nested loop.
+///
+/// This is important for code that looks like
+/// this:
+///
+/// Loop:
+/// ...
+/// br cond, Loop, Next
+/// ...
+/// br cond2, Loop, Out
+///
+/// To identify this common case, we look at the PHI nodes in the header of the
+/// loop. PHI nodes with unchanging values on one backedge correspond to values
+/// that change in the "outer" loop, but not in the "inner" loop.
+///
+/// If we are able to separate out a loop, return the new outer loop that was
+/// created.
+///
+static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
+ DominatorTree *DT, LoopInfo *LI,
+ ScalarEvolution *SE, bool PreserveLCSSA,
+ AssumptionCache *AC) {
+ // Don't try to separate loops without a preheader.
+ if (!Preheader)
+ return nullptr;
+
+ // The header is not a landing pad; preheader insertion should ensure this.
+ BasicBlock *Header = L->getHeader();
+ assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
+
+ PHINode *PN = findPHIToPartitionLoops(L, DT, AC);
+ if (!PN) return nullptr; // No known way to partition.
+
+ // Pull out all predecessors that have varying values in the loop. This
+ // handles the case when a PHI node has multiple instances of itself as
+ // arguments.
+ SmallVector<BasicBlock*, 8> OuterLoopPreds;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (PN->getIncomingValue(i) != PN ||
+ !L->contains(PN->getIncomingBlock(i))) {
+ // We can't split indirectbr edges.
+ if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
+ return nullptr;
+ OuterLoopPreds.push_back(PN->getIncomingBlock(i));
+ }
+ }
+ DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
+
+ // If ScalarEvolution is around and knows anything about values in
+ // this loop, tell it to forget them, because we're about to
+ // substantially change it.
+ if (SE)
+ SE->forgetLoop(L);
+
+ BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer",
+ DT, LI, PreserveLCSSA);
+
+ // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+ // code layout too horribly.
+ placeSplitBlockCarefully(NewBB, OuterLoopPreds, L);
+
+ // Create the new outer loop.
+ Loop *NewOuter = new Loop();
+
+ // Change the parent loop to use the outer loop as its child now.
+ if (Loop *Parent = L->getParentLoop())
+ Parent->replaceChildLoopWith(L, NewOuter);
+ else
+ LI->changeTopLevelLoop(L, NewOuter);
+
+ // L is now a subloop of our outer loop.
+ NewOuter->addChildLoop(L);
+
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I)
+ NewOuter->addBlockEntry(*I);
+
+ // Now reset the header in L, which had been moved by
+ // SplitBlockPredecessors for the outer loop.
+ L->moveToHeader(Header);
+
+ // Determine which blocks should stay in L and which should be moved out to
+ // the Outer loop now.
+ std::set<BasicBlock*> BlocksInL;
+ for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) {
+ BasicBlock *P = *PI;
+ if (DT->dominates(Header, P))
+ addBlockAndPredsToSet(P, Header, BlocksInL);
+ }
+
+ // Scan all of the loop children of L, moving them to OuterLoop if they are
+ // not part of the inner loop.
+ const std::vector<Loop*> &SubLoops = L->getSubLoops();
+ for (size_t I = 0; I != SubLoops.size(); )
+ if (BlocksInL.count(SubLoops[I]->getHeader()))
+ ++I; // Loop remains in L
+ else
+ NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I));
+
+ SmallVector<BasicBlock *, 8> OuterLoopBlocks;
+ OuterLoopBlocks.push_back(NewBB);
+ // Now that we know which blocks are in L and which need to be moved to
+ // OuterLoop, move any blocks that need it.
+ for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
+ BasicBlock *BB = L->getBlocks()[i];
+ if (!BlocksInL.count(BB)) {
+ // Move this block to the parent, updating the exit blocks sets
+ L->removeBlockFromLoop(BB);
+ if ((*LI)[BB] == L) {
+ LI->changeLoopFor(BB, NewOuter);
+ OuterLoopBlocks.push_back(BB);
+ }
+ --i;
+ }
+ }
+
+ // Split edges to exit blocks from the inner loop, if they emerged in the
+ // process of separating the outer one.
+ formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA);
+
+ if (PreserveLCSSA) {
+ // Fix LCSSA form for L. Some values, which previously were only used inside
+ // L, can now be used in NewOuter loop. We need to insert phi-nodes for them
+ // in corresponding exit blocks.
+ // We don't need to form LCSSA recursively, because there cannot be uses
+ // inside a newly created loop of defs from inner loops as those would
+ // already be a use of an LCSSA phi node.
+ formLCSSA(*L, *DT, LI, SE);
+
+ assert(NewOuter->isRecursivelyLCSSAForm(*DT, *LI) &&
+ "LCSSA is broken after separating nested loops!");
+ }
+
+ return NewOuter;
+}
+
+/// \brief This method is called when the specified loop has more than one
+/// backedge in it.
+///
+/// If this occurs, revector all of these backedges to target a new basic block
+/// and have that block branch to the loop header. This ensures that loops
+/// have exactly one backedge.
+static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
+ DominatorTree *DT, LoopInfo *LI) {
+ assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
+
+ // Get information about the loop
+ BasicBlock *Header = L->getHeader();
+ Function *F = Header->getParent();
+
+ // Unique backedge insertion currently depends on having a preheader.
+ if (!Preheader)
+ return nullptr;
+
+ // The header is not an EH pad; preheader insertion should ensure this.
+ assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
+
+ // Figure out which basic blocks contain back-edges to the loop header.
+ std::vector<BasicBlock*> BackedgeBlocks;
+ for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
+ BasicBlock *P = *I;
+
+ // Indirectbr edges cannot be split, so we must fail if we find one.
+ if (isa<IndirectBrInst>(P->getTerminator()))
+ return nullptr;
+
+ if (P != Preheader) BackedgeBlocks.push_back(P);
+ }
+
+ // Create and insert the new backedge block...
+ BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
+ Header->getName() + ".backedge", F);
+ BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
+ BETerminator->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());
+
+ DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
+ << BEBlock->getName() << "\n");
+
+ // Move the new backedge block to right after the last backedge block.
+ Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator();
+ F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);
+
+ // Now that the block has been inserted into the function, create PHI nodes in
+ // the backedge block which correspond to any PHI nodes in the header block.
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(),
+ PN->getName()+".be", BETerminator);
+
+ // Loop over the PHI node, moving all entries except the one for the
+ // preheader over to the new PHI node.
+ unsigned PreheaderIdx = ~0U;
+ bool HasUniqueIncomingValue = true;
+ Value *UniqueValue = nullptr;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *IBB = PN->getIncomingBlock(i);
+ Value *IV = PN->getIncomingValue(i);
+ if (IBB == Preheader) {
+ PreheaderIdx = i;
+ } else {
+ NewPN->addIncoming(IV, IBB);
+ if (HasUniqueIncomingValue) {
+ if (!UniqueValue)
+ UniqueValue = IV;
+ else if (UniqueValue != IV)
+ HasUniqueIncomingValue = false;
+ }
+ }
+ }
+
+ // Delete all of the incoming values from the old PN except the preheader's
+ assert(PreheaderIdx != ~0U && "PHI has no preheader entry??");
+ if (PreheaderIdx != 0) {
+ PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx));
+ PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx));
+ }
+ // Nuke all entries except the zero'th.
+ for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i)
+ PN->removeIncomingValue(e-i, false);
+
+ // Finally, add the newly constructed PHI node as the entry for the BEBlock.
+ PN->addIncoming(NewPN, BEBlock);
+
+ // As an optimization, if all incoming values in the new PhiNode (which is a
+ // subset of the incoming values of the old PHI node) have the same value,
+ // eliminate the PHI Node.
+ if (HasUniqueIncomingValue) {
+ NewPN->replaceAllUsesWith(UniqueValue);
+ BEBlock->getInstList().erase(NewPN);
+ }
+ }
+
+ // Now that all of the PHI nodes have been inserted and adjusted, modify the
+ // backedge blocks to jump to the BEBlock instead of the header.
+ // If one of the backedges has llvm.loop metadata attached, we remove
+ // it from the backedge and add it to BEBlock.
+ unsigned LoopMDKind = BEBlock->getContext().getMDKindID("llvm.loop");
+ MDNode *LoopMD = nullptr;
+ for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) {
+ TerminatorInst *TI = BackedgeBlocks[i]->getTerminator();
+ if (!LoopMD)
+ LoopMD = TI->getMetadata(LoopMDKind);
+ TI->setMetadata(LoopMDKind, nullptr);
+ for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op)
+ if (TI->getSuccessor(Op) == Header)
+ TI->setSuccessor(Op, BEBlock);
+ }
+ BEBlock->getTerminator()->setMetadata(LoopMDKind, LoopMD);
+
+ //===--- Update all analyses which we must preserve now -----------------===//
+
+ // Update Loop Information - we know that this block is now in the current
+ // loop and all parent loops.
+ L->addBasicBlockToLoop(BEBlock, *LI);
+
+ // Update dominator information
+ DT->splitBlock(BEBlock);
+
+ return BEBlock;
+}
+
+/// \brief Simplify one loop and queue further loops for simplification.
+static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
+ DominatorTree *DT, LoopInfo *LI,
+ ScalarEvolution *SE, AssumptionCache *AC,
+ bool PreserveLCSSA) {
+ bool Changed = false;
+ReprocessLoop:
+
+ // Check to see that no blocks (other than the header) in this loop have
+ // predecessors that are not in the loop. This is not valid for natural
+ // loops, but can occur if the blocks are unreachable. Since they are
+ // unreachable we can just shamelessly delete those CFG edges!
+ for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
+ BB != E; ++BB) {
+ if (*BB == L->getHeader()) continue;
+
+ SmallPtrSet<BasicBlock*, 4> BadPreds;
+ for (pred_iterator PI = pred_begin(*BB),
+ PE = pred_end(*BB); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (!L->contains(P))
+ BadPreds.insert(P);
+ }
+
+ // Delete each unique out-of-loop (and thus dead) predecessor.
+ for (BasicBlock *P : BadPreds) {
+
+ DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
+ << P->getName() << "\n");
+
+ // Zap the dead pred's terminator and replace it with unreachable.
+ TerminatorInst *TI = P->getTerminator();
+ changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA);
+ Changed = true;
+ }
+ }
+
+ // If there are exiting blocks with branches on undef, resolve the undef in
+ // the direction which will exit the loop. This will help simplify loop
+ // trip count computations.
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (BasicBlock *ExitingBlock : ExitingBlocks)
+ if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()))
+ if (BI->isConditional()) {
+ if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
+
+ DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
+ << ExitingBlock->getName() << "\n");
+
+ BI->setCondition(ConstantInt::get(Cond->getType(),
+ !L->contains(BI->getSuccessor(0))));
+
+ // This may make the loop analyzable, force SCEV recomputation.
+ if (SE)
+ SE->forgetLoop(L);
+
+ Changed = true;
+ }
+ }
+
+ // Does the loop already have a preheader? If so, don't insert one.
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
+ if (Preheader)
+ Changed = true;
+ }
+
+ // Next, check to make sure that all exit nodes of the loop only have
+ // predecessors that are inside of the loop. This check guarantees that the
+ // loop preheader/header will dominate the exit blocks. If the exit block has
+ // predecessors from outside of the loop, split the edge now.
+ if (formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA))
+ Changed = true;
+
+ // If the header has more than two predecessors at this point (from the
+ // preheader and from multiple backedges), we must adjust the loop.
+ BasicBlock *LoopLatch = L->getLoopLatch();
+ if (!LoopLatch) {
+ // If this is really a nested loop, rip it out into a child loop. Don't do
+ // this for loops with a giant number of backedges, just factor them into a
+ // common backedge instead.
+ if (L->getNumBackEdges() < 8) {
+ if (Loop *OuterL =
+ separateNestedLoop(L, Preheader, DT, LI, SE, PreserveLCSSA, AC)) {
+ ++NumNested;
+ // Enqueue the outer loop as it should be processed next in our
+ // depth-first nest walk.
+ Worklist.push_back(OuterL);
+
+ // This is a big restructuring change, reprocess the whole loop.
+ Changed = true;
+ // GCC doesn't tail recursion eliminate this.
+ // FIXME: It isn't clear we can't rely on LLVM to TRE this.
+ goto ReprocessLoop;
+ }
+ }
+
+ // If we either couldn't, or didn't want to, identify nesting of the loops,
+ // insert a new block that all backedges target, then make it jump to the
+ // loop header.
+ LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI);
+ if (LoopLatch)
+ Changed = true;
+ }
+
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+
+ // Scan over the PHI nodes in the loop header. Since they now have only two
+ // incoming values (the loop is canonicalized), we may have simplified the PHI
+ // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
+ PHINode *PN;
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ (PN = dyn_cast<PHINode>(I++)); )
+ if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) {
+ if (SE) SE->forgetValue(PN);
+ if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) {
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ }
+ }
+
+ // If this loop has multiple exits and the exits all go to the same
+ // block, attempt to merge the exits. This helps several passes, such
+ // as LoopRotation, which do not support loops with multiple exits.
+ // SimplifyCFG also does this (and this code uses the same utility
+ // function), however this code is loop-aware, where SimplifyCFG is
+ // not. That gives it the advantage of being able to hoist
+ // loop-invariant instructions out of the way to open up more
+ // opportunities, and the disadvantage of having the responsibility
+ // to preserve dominator information.
+ auto HasUniqueExitBlock = [&]() {
+ BasicBlock *UniqueExit = nullptr;
+ for (auto *ExitingBB : ExitingBlocks)
+ for (auto *SuccBB : successors(ExitingBB)) {
+ if (L->contains(SuccBB))
+ continue;
+
+ if (!UniqueExit)
+ UniqueExit = SuccBB;
+ else if (UniqueExit != SuccBB)
+ return false;
+ }
+
+ return true;
+ };
+ if (HasUniqueExitBlock()) {
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitingBlock = ExitingBlocks[i];
+ if (!ExitingBlock->getSinglePredecessor()) continue;
+ BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!BI || !BI->isConditional()) continue;
+ CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+ if (!CI || CI->getParent() != ExitingBlock) continue;
+
+ // Attempt to hoist out all instructions except for the
+ // comparison and the branch.
+ bool AllInvariant = true;
+ bool AnyInvariant = false;
+ for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
+ Instruction *Inst = &*I++;
+ // Skip debug info intrinsics.
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+ if (Inst == CI)
+ continue;
+ if (!L->makeLoopInvariant(Inst, AnyInvariant,
+ Preheader ? Preheader->getTerminator()
+ : nullptr)) {
+ AllInvariant = false;
+ break;
+ }
+ }
+ if (AnyInvariant) {
+ Changed = true;
+ // The loop disposition of all SCEV expressions that depend on any
+ // hoisted values have also changed.
+ if (SE)
+ SE->forgetLoopDispositions(L);
+ }
+ if (!AllInvariant) continue;
+
+ // The block has now been cleared of all instructions except for
+ // a comparison and a conditional branch. SimplifyCFG may be able
+ // to fold it now.
+ if (!FoldBranchToCommonDest(BI))
+ continue;
+
+ // Success. The block is now dead, so remove it from the loop,
+ // update the dominator tree and delete it.
+ DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
+ << ExitingBlock->getName() << "\n");
+
+ // Notify ScalarEvolution before deleting this block. Currently assume the
+ // parent loop doesn't change (spliting edges doesn't count). If blocks,
+ // CFG edges, or other values in the parent loop change, then we need call
+ // to forgetLoop() for the parent instead.
+ if (SE)
+ SE->forgetLoop(L);
+
+ assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
+ Changed = true;
+ LI->removeBlock(ExitingBlock);
+
+ DomTreeNode *Node = DT->getNode(ExitingBlock);
+ const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
+ Node->getChildren();
+ while (!Children.empty()) {
+ DomTreeNode *Child = Children.front();
+ DT->changeImmediateDominator(Child, Node->getIDom());
+ }
+ DT->eraseNode(ExitingBlock);
+
+ BI->getSuccessor(0)->removePredecessor(
+ ExitingBlock, /* DontDeleteUselessPHIs */ PreserveLCSSA);
+ BI->getSuccessor(1)->removePredecessor(
+ ExitingBlock, /* DontDeleteUselessPHIs */ PreserveLCSSA);
+ ExitingBlock->eraseFromParent();
+ }
+ }
+
+ return Changed;
+}
+
+bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
+ ScalarEvolution *SE, AssumptionCache *AC,
+ bool PreserveLCSSA) {
+ bool Changed = false;
+
+#ifndef NDEBUG
+ // If we're asked to preserve LCSSA, the loop nest needs to start in LCSSA
+ // form.
+ if (PreserveLCSSA) {
+ assert(DT && "DT not available.");
+ assert(LI && "LI not available.");
+ assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
+ "Requested to preserve LCSSA, but it's already broken.");
+ }
+#endif
+
+ // Worklist maintains our depth-first queue of loops in this nest to process.
+ SmallVector<Loop *, 4> Worklist;
+ Worklist.push_back(L);
+
+ // Walk the worklist from front to back, pushing newly found sub loops onto
+ // the back. This will let us process loops from back to front in depth-first
+ // order. We can use this simple process because loops form a tree.
+ for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
+ Loop *L2 = Worklist[Idx];
+ Worklist.append(L2->begin(), L2->end());
+ }
+
+ while (!Worklist.empty())
+ Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE,
+ AC, PreserveLCSSA);
+
+ return Changed;
+}
+
+namespace {
+ struct LoopSimplify : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ LoopSimplify() : FunctionPass(ID) {
+ initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+
+ // We need loop information to identify the loops...
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved<DependenceAnalysisWrapperPass>();
+ AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
+ }
+
+ /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
+ void verifyAnalysis() const override;
+ };
+}
+
+char LoopSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
+ "Canonicalize natural loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
+ "Canonicalize natural loops", false, false)
+
+// Publicly exposed interface to pass...
+char &llvm::LoopSimplifyID = LoopSimplify::ID;
+Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
+
+/// runOnFunction - Run down all loops in the CFG (recursively, but we could do
+/// it in any convenient order) inserting preheaders...
+///
+bool LoopSimplify::runOnFunction(Function &F) {
+ bool Changed = false;
+ LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+ ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr;
+ AssumptionCache *AC =
+ &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+
+ bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
+ // Simplify each loop nest in the function.
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ Changed |= simplifyLoop(*I, DT, LI, SE, AC, PreserveLCSSA);
+
+#ifndef NDEBUG
+ if (PreserveLCSSA) {
+ bool InLCSSA = all_of(
+ *LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT, *LI); });
+ assert(InLCSSA && "LCSSA is broken after loop-simplify.");
+ }
+#endif
+ return Changed;
+}
+
+PreservedAnalyses LoopSimplifyPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ bool Changed = false;
+ LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
+ DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F);
+ AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
+
+ // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA
+ // after simplifying the loops.
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ Changed |= simplifyLoop(*I, DT, LI, SE, AC, /*PreserveLCSSA*/ false);
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<BasicAA>();
+ PA.preserve<GlobalsAA>();
+ PA.preserve<SCEVAA>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+ PA.preserve<DependenceAnalysis>();
+ return PA;
+}
+
+// FIXME: Restore this code when we re-enable verification in verifyAnalysis
+// below.
+#if 0
+static void verifyLoop(Loop *L) {
+ // Verify subloops.
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ verifyLoop(*I);
+
+ // It used to be possible to just assert L->isLoopSimplifyForm(), however
+ // with the introduction of indirectbr, there are now cases where it's
+ // not possible to transform a loop as necessary. We can at least check
+ // that there is an indirectbr near any time there's trouble.
+
+ // Indirectbr can interfere with preheader and unique backedge insertion.
+ if (!L->getLoopPreheader() || !L->getLoopLatch()) {
+ bool HasIndBrPred = false;
+ for (pred_iterator PI = pred_begin(L->getHeader()),
+ PE = pred_end(L->getHeader()); PI != PE; ++PI)
+ if (isa<IndirectBrInst>((*PI)->getTerminator())) {
+ HasIndBrPred = true;
+ break;
+ }
+ assert(HasIndBrPred &&
+ "LoopSimplify has no excuse for missing loop header info!");
+ (void)HasIndBrPred;
+ }
+
+ // Indirectbr can interfere with exit block canonicalization.
+ if (!L->hasDedicatedExits()) {
+ bool HasIndBrExiting = false;
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) {
+ HasIndBrExiting = true;
+ break;
+ }
+ }
+
+ assert(HasIndBrExiting &&
+ "LoopSimplify has no excuse for missing exit block info!");
+ (void)HasIndBrExiting;
+ }
+}
+#endif
+
+void LoopSimplify::verifyAnalysis() const {
+ // FIXME: This routine is being called mid-way through the loop pass manager
+ // as loop passes destroy this analysis. That's actually fine, but we have no
+ // way of expressing that here. Once all of the passes that destroy this are
+ // hoisted out of the loop pass manager we can add back verification here.
+#if 0
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ verifyLoop(*I);
+#endif
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
new file mode 100644
index 000000000000..f2527f89e83e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -0,0 +1,871 @@
+//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities. It does not define any
+// actual pass or policy, but provides a single function to perform loop
+// unrolling.
+//
+// The process of unrolling can produce extraneous basic blocks linked with
+// unconditional branches. This will be corrected in the future.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-unroll"
+
+// TODO: Should these be here or in LoopUnroll?
+STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
+STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
+
+static cl::opt<bool>
+UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
+ cl::desc("Allow runtime unrolled loops to be unrolled "
+ "with epilog instead of prolog."));
+
+static cl::opt<bool>
+UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
+ cl::desc("Verify domtree after unrolling"),
+#ifdef NDEBUG
+ cl::init(false)
+#else
+ cl::init(true)
+#endif
+ );
+
+/// Convert the instruction operands from referencing the current values into
+/// those specified by VMap.
+static inline void remapInstruction(Instruction *I,
+ ValueToValueMapTy &VMap) {
+ for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
+ Value *Op = I->getOperand(op);
+ ValueToValueMapTy::iterator It = VMap.find(Op);
+ if (It != VMap.end())
+ I->setOperand(op, It->second);
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i));
+ if (It != VMap.end())
+ PN->setIncomingBlock(i, cast<BasicBlock>(It->second));
+ }
+ }
+}
+
+/// Folds a basic block into its predecessor if it only has one predecessor, and
+/// that predecessor only has one successor.
+/// The LoopInfo Analysis that is passed will be kept consistent. If folding is
+/// successful references to the containing loop must be removed from
+/// ScalarEvolution by calling ScalarEvolution::forgetLoop because SE may have
+/// references to the eliminated BB. The argument ForgottenLoops contains a set
+/// of loops that have already been forgotten to prevent redundant, expensive
+/// calls to ScalarEvolution::forgetLoop. Returns the new combined block.
+static BasicBlock *
+foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, ScalarEvolution *SE,
+ SmallPtrSetImpl<Loop *> &ForgottenLoops,
+ DominatorTree *DT) {
+ // Merge basic blocks into their predecessor if there is only one distinct
+ // pred, and if there is only one distinct successor of the predecessor, and
+ // if there are no PHI nodes.
+ BasicBlock *OnlyPred = BB->getSinglePredecessor();
+ if (!OnlyPred) return nullptr;
+
+ if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
+ return nullptr;
+
+ DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred);
+
+ // Resolve any PHI nodes at the start of the block. They are all
+ // guaranteed to have exactly one entry if they exist, unless there are
+ // multiple duplicate (but guaranteed to be equal) entries for the
+ // incoming edges. This occurs when there are multiple edges from
+ // OnlyPred to OnlySucc.
+ FoldSingleEntryPHINodes(BB);
+
+ // Delete the unconditional branch from the predecessor...
+ OnlyPred->getInstList().pop_back();
+
+ // Make all PHI nodes that referred to BB now refer to Pred as their
+ // source...
+ BB->replaceAllUsesWith(OnlyPred);
+
+ // Move all definitions in the successor to the predecessor...
+ OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
+
+ // OldName will be valid until erased.
+ StringRef OldName = BB->getName();
+
+ // Erase the old block and update dominator info.
+ if (DT)
+ if (DomTreeNode *DTN = DT->getNode(BB)) {
+ DomTreeNode *PredDTN = DT->getNode(OnlyPred);
+ SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end());
+ for (auto *DI : Children)
+ DT->changeImmediateDominator(DI, PredDTN);
+
+ DT->eraseNode(BB);
+ }
+
+ // ScalarEvolution holds references to loop exit blocks.
+ if (SE) {
+ if (Loop *L = LI->getLoopFor(BB)) {
+ if (ForgottenLoops.insert(L).second)
+ SE->forgetLoop(L);
+ }
+ }
+ LI->removeBlock(BB);
+
+ // Inherit predecessor's name if it exists...
+ if (!OldName.empty() && !OnlyPred->hasName())
+ OnlyPred->setName(OldName);
+
+ BB->eraseFromParent();
+
+ return OnlyPred;
+}
+
+/// Check if unrolling created a situation where we need to insert phi nodes to
+/// preserve LCSSA form.
+/// \param Blocks is a vector of basic blocks representing unrolled loop.
+/// \param L is the outer loop.
+/// It's possible that some of the blocks are in L, and some are not. In this
+/// case, if there is a use is outside L, and definition is inside L, we need to
+/// insert a phi-node, otherwise LCSSA will be broken.
+/// The function is just a helper function for llvm::UnrollLoop that returns
+/// true if this situation occurs, indicating that LCSSA needs to be fixed.
+static bool needToInsertPhisForLCSSA(Loop *L, std::vector<BasicBlock *> Blocks,
+ LoopInfo *LI) {
+ for (BasicBlock *BB : Blocks) {
+ if (LI->getLoopFor(BB) == L)
+ continue;
+ for (Instruction &I : *BB) {
+ for (Use &U : I.operands()) {
+ if (auto Def = dyn_cast<Instruction>(U)) {
+ Loop *DefLoop = LI->getLoopFor(Def->getParent());
+ if (!DefLoop)
+ continue;
+ if (DefLoop->contains(L))
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+/// Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary
+/// and adds a mapping from the original loop to the new loop to NewLoops.
+/// Returns nullptr if no new loop was created and a pointer to the
+/// original loop OriginalBB was part of otherwise.
+const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB,
+ BasicBlock *ClonedBB, LoopInfo *LI,
+ NewLoopsMap &NewLoops) {
+ // Figure out which loop New is in.
+ const Loop *OldLoop = LI->getLoopFor(OriginalBB);
+ assert(OldLoop && "Should (at least) be in the loop being unrolled!");
+
+ Loop *&NewLoop = NewLoops[OldLoop];
+ if (!NewLoop) {
+ // Found a new sub-loop.
+ assert(OriginalBB == OldLoop->getHeader() &&
+ "Header should be first in RPO");
+
+ NewLoop = new Loop();
+ Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop());
+
+ if (NewLoopParent)
+ NewLoopParent->addChildLoop(NewLoop);
+ else
+ LI->addTopLevelLoop(NewLoop);
+
+ NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
+ return OldLoop;
+ } else {
+ NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
+ return nullptr;
+ }
+}
+
+/// The function chooses which type of unroll (epilog or prolog) is more
+/// profitabale.
+/// Epilog unroll is more profitable when there is PHI that starts from
+/// constant. In this case epilog will leave PHI start from constant,
+/// but prolog will convert it to non-constant.
+///
+/// loop:
+/// PN = PHI [I, Latch], [CI, PreHeader]
+/// I = foo(PN)
+/// ...
+///
+/// Epilog unroll case.
+/// loop:
+/// PN = PHI [I2, Latch], [CI, PreHeader]
+/// I1 = foo(PN)
+/// I2 = foo(I1)
+/// ...
+/// Prolog unroll case.
+/// NewPN = PHI [PrologI, Prolog], [CI, PreHeader]
+/// loop:
+/// PN = PHI [I2, Latch], [NewPN, PreHeader]
+/// I1 = foo(PN)
+/// I2 = foo(I1)
+/// ...
+///
+static bool isEpilogProfitable(Loop *L) {
+ BasicBlock *PreHeader = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ assert(PreHeader && Header);
+ for (Instruction &BBI : *Header) {
+ PHINode *PN = dyn_cast<PHINode>(&BBI);
+ if (!PN)
+ break;
+ if (isa<ConstantInt>(PN->getIncomingValueForBlock(PreHeader)))
+ return true;
+ }
+ return false;
+}
+
+/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
+/// if unrolling was successful, or false if the loop was unmodified. Unrolling
+/// can only fail when the loop's latch block is not terminated by a conditional
+/// branch instruction. However, if the trip count (and multiple) are not known,
+/// loop unrolling will mostly produce more code that is no faster.
+///
+/// TripCount is the upper bound of the iteration on which control exits
+/// LatchBlock. Control may exit the loop prior to TripCount iterations either
+/// via an early branch in other loop block or via LatchBlock terminator. This
+/// is relaxed from the general definition of trip count which is the number of
+/// times the loop header executes. Note that UnrollLoop assumes that the loop
+/// counter test is in LatchBlock in order to remove unnecesssary instances of
+/// the test. If control can exit the loop from the LatchBlock's terminator
+/// prior to TripCount iterations, flag PreserveCondBr needs to be set.
+///
+/// PreserveCondBr indicates whether the conditional branch of the LatchBlock
+/// needs to be preserved. It is needed when we use trip count upper bound to
+/// fully unroll the loop. If PreserveOnlyFirst is also set then only the first
+/// conditional branch needs to be preserved.
+///
+/// Similarly, TripMultiple divides the number of times that the LatchBlock may
+/// execute without exiting the loop.
+///
+/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that
+/// have a runtime (i.e. not compile time constant) trip count. Unrolling these
+/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count"
+/// iterations before branching into the unrolled loop. UnrollLoop will not
+/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
+/// AllowExpensiveTripCount is false.
+///
+/// If we want to perform PGO-based loop peeling, PeelCount is set to the
+/// number of iterations we want to peel off.
+///
+/// The LoopInfo Analysis that is passed will be kept consistent.
+///
+/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
+/// DominatorTree if they are non-null.
+bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
+ bool AllowRuntime, bool AllowExpensiveTripCount,
+ bool PreserveCondBr, bool PreserveOnlyFirst,
+ unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI,
+ ScalarEvolution *SE, DominatorTree *DT,
+ AssumptionCache *AC, OptimizationRemarkEmitter *ORE,
+ bool PreserveLCSSA) {
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
+ return false;
+ }
+
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (!LatchBlock) {
+ DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n");
+ return false;
+ }
+
+ // Loops with indirectbr cannot be cloned.
+ if (!L->isSafeToClone()) {
+ DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n");
+ return false;
+ }
+
+ // The current loop unroll pass can only unroll loops with a single latch
+ // that's a conditional branch exiting the loop.
+ // FIXME: The implementation can be extended to work with more complicated
+ // cases, e.g. loops with multiple latches.
+ BasicBlock *Header = L->getHeader();
+ BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
+
+ if (!BI || BI->isUnconditional()) {
+ // The loop-rotate pass can be helpful to avoid this in many cases.
+ DEBUG(dbgs() <<
+ " Can't unroll; loop not terminated by a conditional branch.\n");
+ return false;
+ }
+
+ auto CheckSuccessors = [&](unsigned S1, unsigned S2) {
+ return BI->getSuccessor(S1) == Header && !L->contains(BI->getSuccessor(S2));
+ };
+
+ if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) {
+ DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch"
+ " exiting the loop can be unrolled\n");
+ return false;
+ }
+
+ if (Header->hasAddressTaken()) {
+ // The loop-rotate pass can be helpful to avoid this in many cases.
+ DEBUG(dbgs() <<
+ " Won't unroll loop: address of header block is taken.\n");
+ return false;
+ }
+
+ if (TripCount != 0)
+ DEBUG(dbgs() << " Trip Count = " << TripCount << "\n");
+ if (TripMultiple != 1)
+ DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n");
+
+ // Effectively "DCE" unrolled iterations that are beyond the tripcount
+ // and will never be executed.
+ if (TripCount != 0 && Count > TripCount)
+ Count = TripCount;
+
+ // Don't enter the unroll code if there is nothing to do.
+ if (TripCount == 0 && Count < 2 && PeelCount == 0) {
+ DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
+ return false;
+ }
+
+ assert(Count > 0);
+ assert(TripMultiple > 0);
+ assert(TripCount == 0 || TripCount % TripMultiple == 0);
+
+ // Are we eliminating the loop control altogether?
+ bool CompletelyUnroll = Count == TripCount;
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ std::vector<BasicBlock*> OriginalLoopBlocks = L->getBlocks();
+
+ // Go through all exits of L and see if there are any phi-nodes there. We just
+ // conservatively assume that they're inserted to preserve LCSSA form, which
+ // means that complete unrolling might break this form. We need to either fix
+ // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For
+ // now we just recompute LCSSA for the outer loop, but it should be possible
+ // to fix it in-place.
+ bool NeedToFixLCSSA = PreserveLCSSA && CompletelyUnroll &&
+ any_of(ExitBlocks, [](const BasicBlock *BB) {
+ return isa<PHINode>(BB->begin());
+ });
+
+ // We assume a run-time trip count if the compiler cannot
+ // figure out the loop trip count and the unroll-runtime
+ // flag is specified.
+ bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
+
+ assert((!RuntimeTripCount || !PeelCount) &&
+ "Did not expect runtime trip-count unrolling "
+ "and peeling for the same loop");
+
+ if (PeelCount)
+ peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA);
+
+ // Loops containing convergent instructions must have a count that divides
+ // their TripMultiple.
+ DEBUG(
+ {
+ bool HasConvergent = false;
+ for (auto &BB : L->blocks())
+ for (auto &I : *BB)
+ if (auto CS = CallSite(&I))
+ HasConvergent |= CS.isConvergent();
+ assert((!HasConvergent || TripMultiple % Count == 0) &&
+ "Unroll count must divide trip multiple if loop contains a "
+ "convergent operation.");
+ });
+
+ bool EpilogProfitability =
+ UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
+ : isEpilogProfitable(L);
+
+ if (RuntimeTripCount && TripMultiple % Count != 0 &&
+ !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
+ EpilogProfitability, LI, SE, DT,
+ PreserveLCSSA)) {
+ if (Force)
+ RuntimeTripCount = false;
+ else {
+ DEBUG(
+ dbgs() << "Wont unroll; remainder loop could not be generated"
+ "when assuming runtime trip count\n");
+ return false;
+ }
+ }
+
+ // Notify ScalarEvolution that the loop will be substantially changed,
+ // if not outright eliminated.
+ if (SE)
+ SE->forgetLoop(L);
+
+ // If we know the trip count, we know the multiple...
+ unsigned BreakoutTrip = 0;
+ if (TripCount != 0) {
+ BreakoutTrip = TripCount % Count;
+ TripMultiple = 0;
+ } else {
+ // Figure out what multiple to use.
+ BreakoutTrip = TripMultiple =
+ (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
+ }
+
+ using namespace ore;
+ // Report the unrolling decision.
+ if (CompletelyUnroll) {
+ DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
+ << " with trip count " << TripCount << "!\n");
+ ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
+ L->getHeader())
+ << "completely unrolled loop with "
+ << NV("UnrollCount", TripCount) << " iterations");
+ } else if (PeelCount) {
+ DEBUG(dbgs() << "PEELING loop %" << Header->getName()
+ << " with iteration count " << PeelCount << "!\n");
+ ORE->emit(OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
+ L->getHeader())
+ << " peeled loop by " << NV("PeelCount", PeelCount)
+ << " iterations");
+ } else {
+ OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
+ L->getHeader());
+ Diag << "unrolled loop by a factor of " << NV("UnrollCount", Count);
+
+ DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
+ << " by " << Count);
+ if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
+ DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
+ ORE->emit(Diag << " with a breakout at trip "
+ << NV("BreakoutTrip", BreakoutTrip));
+ } else if (TripMultiple != 1) {
+ DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+ ORE->emit(Diag << " with " << NV("TripMultiple", TripMultiple)
+ << " trips per branch");
+ } else if (RuntimeTripCount) {
+ DEBUG(dbgs() << " with run-time trip count");
+ ORE->emit(Diag << " with run-time trip count");
+ }
+ DEBUG(dbgs() << "!\n");
+ }
+
+ bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
+ BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
+
+ // For the first iteration of the loop, we should use the precloned values for
+ // PHI nodes. Insert associations now.
+ ValueToValueMapTy LastValueMap;
+ std::vector<PHINode*> OrigPHINode;
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ OrigPHINode.push_back(cast<PHINode>(I));
+ }
+
+ std::vector<BasicBlock*> Headers;
+ std::vector<BasicBlock*> Latches;
+ Headers.push_back(Header);
+ Latches.push_back(LatchBlock);
+
+ // The current on-the-fly SSA update requires blocks to be processed in
+ // reverse postorder so that LastValueMap contains the correct value at each
+ // exit.
+ LoopBlocksDFS DFS(L);
+ DFS.perform(LI);
+
+ // Stash the DFS iterators before adding blocks to the loop.
+ LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
+ LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
+
+ std::vector<BasicBlock*> UnrolledLoopBlocks = L->getBlocks();
+
+ // Loop Unrolling might create new loops. While we do preserve LoopInfo, we
+ // might break loop-simplified form for these loops (as they, e.g., would
+ // share the same exit blocks). We'll keep track of loops for which we can
+ // break this so that later we can re-simplify them.
+ SmallSetVector<Loop *, 4> LoopsToSimplify;
+ for (Loop *SubLoop : *L)
+ LoopsToSimplify.insert(SubLoop);
+
+ if (Header->getParent()->isDebugInfoForProfiling())
+ for (BasicBlock *BB : L->getBlocks())
+ for (Instruction &I : *BB)
+ if (const DILocation *DIL = I.getDebugLoc())
+ I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));
+
+ for (unsigned It = 1; It != Count; ++It) {
+ std::vector<BasicBlock*> NewBlocks;
+ SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
+ NewLoops[L] = L;
+
+ for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
+ ValueToValueMapTy VMap;
+ BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
+ Header->getParent()->getBasicBlockList().push_back(New);
+
+ assert((*BB != Header || LI->getLoopFor(*BB) == L) &&
+ "Header should not be in a sub-loop");
+ // Tell LI about New.
+ const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
+ if (OldLoop) {
+ LoopsToSimplify.insert(NewLoops[OldLoop]);
+
+ // Forget the old loop, since its inputs may have changed.
+ if (SE)
+ SE->forgetLoop(OldLoop);
+ }
+
+ if (*BB == Header)
+ // Loop over all of the PHI nodes in the block, changing them to use
+ // the incoming values from the previous block.
+ for (PHINode *OrigPHI : OrigPHINode) {
+ PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]);
+ Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
+ if (Instruction *InValI = dyn_cast<Instruction>(InVal))
+ if (It > 1 && L->contains(InValI))
+ InVal = LastValueMap[InValI];
+ VMap[OrigPHI] = InVal;
+ New->getInstList().erase(NewPHI);
+ }
+
+ // Update our running map of newest clones
+ LastValueMap[*BB] = New;
+ for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
+ VI != VE; ++VI)
+ LastValueMap[VI->first] = VI->second;
+
+ // Add phi entries for newly created values to all exit blocks.
+ for (BasicBlock *Succ : successors(*BB)) {
+ if (L->contains(Succ))
+ continue;
+ for (BasicBlock::iterator BBI = Succ->begin();
+ PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
+ Value *Incoming = phi->getIncomingValueForBlock(*BB);
+ ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
+ if (It != LastValueMap.end())
+ Incoming = It->second;
+ phi->addIncoming(Incoming, New);
+ }
+ }
+ // Keep track of new headers and latches as we create them, so that
+ // we can insert the proper branches later.
+ if (*BB == Header)
+ Headers.push_back(New);
+ if (*BB == LatchBlock)
+ Latches.push_back(New);
+
+ NewBlocks.push_back(New);
+ UnrolledLoopBlocks.push_back(New);
+
+ // Update DomTree: since we just copy the loop body, and each copy has a
+ // dedicated entry block (copy of the header block), this header's copy
+ // dominates all copied blocks. That means, dominance relations in the
+ // copied body are the same as in the original body.
+ if (DT) {
+ if (*BB == Header)
+ DT->addNewBlock(New, Latches[It - 1]);
+ else {
+ auto BBDomNode = DT->getNode(*BB);
+ auto BBIDom = BBDomNode->getIDom();
+ BasicBlock *OriginalBBIDom = BBIDom->getBlock();
+ DT->addNewBlock(
+ New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
+ }
+ }
+ }
+
+ // Remap all instructions in the most recent iteration
+ for (BasicBlock *NewBlock : NewBlocks) {
+ for (Instruction &I : *NewBlock) {
+ ::remapInstruction(&I, LastValueMap);
+ if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ if (II->getIntrinsicID() == Intrinsic::assume)
+ AC->registerAssumption(II);
+ }
+ }
+ }
+
+ // Loop over the PHI nodes in the original block, setting incoming values.
+ for (PHINode *PN : OrigPHINode) {
+ if (CompletelyUnroll) {
+ PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
+ Header->getInstList().erase(PN);
+ }
+ else if (Count > 1) {
+ Value *InVal = PN->removeIncomingValue(LatchBlock, false);
+ // If this value was defined in the loop, take the value defined by the
+ // last iteration of the loop.
+ if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
+ if (L->contains(InValI))
+ InVal = LastValueMap[InVal];
+ }
+ assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
+ PN->addIncoming(InVal, Latches.back());
+ }
+ }
+
+ // Now that all the basic blocks for the unrolled iterations are in place,
+ // set up the branches to connect them.
+ for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+ // The original branch was replicated in each unrolled iteration.
+ BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+
+ // The branch destination.
+ unsigned j = (i + 1) % e;
+ BasicBlock *Dest = Headers[j];
+ bool NeedConditional = true;
+
+ if (RuntimeTripCount && j != 0) {
+ NeedConditional = false;
+ }
+
+ // For a complete unroll, make the last iteration end with a branch
+ // to the exit block.
+ if (CompletelyUnroll) {
+ if (j == 0)
+ Dest = LoopExit;
+ // If using trip count upper bound to completely unroll, we need to keep
+ // the conditional branch except the last one because the loop may exit
+ // after any iteration.
+ assert(NeedConditional &&
+ "NeedCondition cannot be modified by both complete "
+ "unrolling and runtime unrolling");
+ NeedConditional = (PreserveCondBr && j && !(PreserveOnlyFirst && i != 0));
+ } else if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
+ // If we know the trip count or a multiple of it, we can safely use an
+ // unconditional branch for some iterations.
+ NeedConditional = false;
+ }
+
+ if (NeedConditional) {
+ // Update the conditional branch's successor for the following
+ // iteration.
+ Term->setSuccessor(!ContinueOnTrue, Dest);
+ } else {
+ // Remove phi operands at this loop exit
+ if (Dest != LoopExit) {
+ BasicBlock *BB = Latches[i];
+ for (BasicBlock *Succ: successors(BB)) {
+ if (Succ == Headers[i])
+ continue;
+ for (BasicBlock::iterator BBI = Succ->begin();
+ PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) {
+ Phi->removeIncomingValue(BB, false);
+ }
+ }
+ }
+ // Replace the conditional branch with an unconditional one.
+ BranchInst::Create(Dest, Term);
+ Term->eraseFromParent();
+ }
+ }
+
+ // Update dominators of blocks we might reach through exits.
+ // Immediate dominator of such block might change, because we add more
+ // routes which can lead to the exit: we can now reach it from the copied
+ // iterations too.
+ if (DT && Count > 1) {
+ for (auto *BB : OriginalLoopBlocks) {
+ auto *BBDomNode = DT->getNode(BB);
+ SmallVector<BasicBlock *, 16> ChildrenToUpdate;
+ for (auto *ChildDomNode : BBDomNode->getChildren()) {
+ auto *ChildBB = ChildDomNode->getBlock();
+ if (!L->contains(ChildBB))
+ ChildrenToUpdate.push_back(ChildBB);
+ }
+ BasicBlock *NewIDom;
+ if (BB == LatchBlock) {
+ // The latch is special because we emit unconditional branches in
+ // some cases where the original loop contained a conditional branch.
+ // Since the latch is always at the bottom of the loop, if the latch
+ // dominated an exit before unrolling, the new dominator of that exit
+ // must also be a latch. Specifically, the dominator is the first
+ // latch which ends in a conditional branch, or the last latch if
+ // there is no such latch.
+ NewIDom = Latches.back();
+ for (BasicBlock *IterLatch : Latches) {
+ TerminatorInst *Term = IterLatch->getTerminator();
+ if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
+ NewIDom = IterLatch;
+ break;
+ }
+ }
+ } else {
+ // The new idom of the block will be the nearest common dominator
+ // of all copies of the previous idom. This is equivalent to the
+ // nearest common dominator of the previous idom and the first latch,
+ // which dominates all copies of the previous idom.
+ NewIDom = DT->findNearestCommonDominator(BB, LatchBlock);
+ }
+ for (auto *ChildBB : ChildrenToUpdate)
+ DT->changeImmediateDominator(ChildBB, NewIDom);
+ }
+ }
+
+ if (DT && UnrollVerifyDomtree)
+ DT->verifyDomTree();
+
+ // Merge adjacent basic blocks, if possible.
+ SmallPtrSet<Loop *, 4> ForgottenLoops;
+ for (BasicBlock *Latch : Latches) {
+ BranchInst *Term = cast<BranchInst>(Latch->getTerminator());
+ if (Term->isUnconditional()) {
+ BasicBlock *Dest = Term->getSuccessor(0);
+ if (BasicBlock *Fold =
+ foldBlockIntoPredecessor(Dest, LI, SE, ForgottenLoops, DT)) {
+ // Dest has been folded into Fold. Update our worklists accordingly.
+ std::replace(Latches.begin(), Latches.end(), Dest, Fold);
+ UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(),
+ UnrolledLoopBlocks.end(), Dest),
+ UnrolledLoopBlocks.end());
+ }
+ }
+ }
+
+ // Simplify any new induction variables in the partially unrolled loop.
+ if (SE && !CompletelyUnroll && Count > 1) {
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
+ simplifyLoopIVs(L, SE, DT, LI, DeadInsts);
+
+ // Aggressively clean up dead instructions that simplifyLoopIVs already
+ // identified. Any remaining should be cleaned up below.
+ while (!DeadInsts.empty())
+ if (Instruction *Inst =
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ }
+
+ // At this point, the code is well formed. We now do a quick sweep over the
+ // inserted code, doing constant propagation and dead code elimination as we
+ // go.
+ const DataLayout &DL = Header->getModule()->getDataLayout();
+ const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
+ for (BasicBlock *BB : NewLoopBlocks) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ Instruction *Inst = &*I++;
+
+ if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC}))
+ if (LI->replacementPreservesLCSSAForm(Inst, V))
+ Inst->replaceAllUsesWith(V);
+ if (isInstructionTriviallyDead(Inst))
+ BB->getInstList().erase(Inst);
+ }
+ }
+
+ // TODO: after peeling or unrolling, previously loop variant conditions are
+ // likely to fold to constants, eagerly propagating those here will require
+ // fewer cleanup passes to be run. Alternatively, a LoopEarlyCSE might be
+ // appropriate.
+
+ NumCompletelyUnrolled += CompletelyUnroll;
+ ++NumUnrolled;
+
+ Loop *OuterL = L->getParentLoop();
+ // Update LoopInfo if the loop is completely removed.
+ if (CompletelyUnroll)
+ LI->markAsRemoved(L);
+
+ // After complete unrolling most of the blocks should be contained in OuterL.
+ // However, some of them might happen to be out of OuterL (e.g. if they
+ // precede a loop exit). In this case we might need to insert PHI nodes in
+ // order to preserve LCSSA form.
+ // We don't need to check this if we already know that we need to fix LCSSA
+ // form.
+ // TODO: For now we just recompute LCSSA for the outer loop in this case, but
+ // it should be possible to fix it in-place.
+ if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA)
+ NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI);
+
+ // If we have a pass and a DominatorTree we should re-simplify impacted loops
+ // to ensure subsequent analyses can rely on this form. We want to simplify
+ // at least one layer outside of the loop that was unrolled so that any
+ // changes to the parent loop exposed by the unrolling are considered.
+ if (DT) {
+ if (OuterL) {
+ // OuterL includes all loops for which we can break loop-simplify, so
+ // it's sufficient to simplify only it (it'll recursively simplify inner
+ // loops too).
+ if (NeedToFixLCSSA) {
+ // LCSSA must be performed on the outermost affected loop. The unrolled
+ // loop's last loop latch is guaranteed to be in the outermost loop
+ // after LoopInfo's been updated by markAsRemoved.
+ Loop *LatchLoop = LI->getLoopFor(Latches.back());
+ Loop *FixLCSSALoop = OuterL;
+ if (!FixLCSSALoop->contains(LatchLoop))
+ while (FixLCSSALoop->getParentLoop() != LatchLoop)
+ FixLCSSALoop = FixLCSSALoop->getParentLoop();
+
+ formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE);
+ } else if (PreserveLCSSA) {
+ assert(OuterL->isLCSSAForm(*DT) &&
+ "Loops should be in LCSSA form after loop-unroll.");
+ }
+
+ // TODO: That potentially might be compile-time expensive. We should try
+ // to fix the loop-simplified form incrementally.
+ simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA);
+ } else {
+ // Simplify loops for which we might've broken loop-simplify form.
+ for (Loop *SubLoop : LoopsToSimplify)
+ simplifyLoop(SubLoop, DT, LI, SE, AC, PreserveLCSSA);
+ }
+ }
+
+ return true;
+}
+
+/// Given an llvm.loop loop id metadata node, returns the loop hint metadata
+/// node with the given name (for example, "llvm.loop.unroll.count"). If no
+/// such metadata node exists, then nullptr is returned.
+MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) {
+ // First operand should refer to the loop id itself.
+ assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
+ assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+
+ for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ if (!MD)
+ continue;
+
+ MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ if (!S)
+ continue;
+
+ if (Name.equals(S->getString()))
+ return MD;
+ }
+ return nullptr;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
new file mode 100644
index 000000000000..5c21490793e7
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -0,0 +1,554 @@
+//===-- UnrollLoopPeel.cpp - Loop peeling utilities -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities for peeling loops
+// with dynamically inferred (from PGO) trip counts. See LoopUnroll.cpp for
+// unrolling loops with compile-time constant trip counts.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include <algorithm>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-unroll"
+STATISTIC(NumPeeled, "Number of loops peeled");
+
+static cl::opt<unsigned> UnrollPeelMaxCount(
+ "unroll-peel-max-count", cl::init(7), cl::Hidden,
+ cl::desc("Max average trip count which will cause loop peeling."));
+
+static cl::opt<unsigned> UnrollForcePeelCount(
+ "unroll-force-peel-count", cl::init(0), cl::Hidden,
+ cl::desc("Force a peel count regardless of profiling information."));
+
+// Designates that a Phi is estimated to become invariant after an "infinite"
+// number of loop iterations (i.e. only may become an invariant if the loop is
+// fully unrolled).
+static const unsigned InfiniteIterationsToInvariance = UINT_MAX;
+
+// Check whether we are capable of peeling this loop.
+static bool canPeel(Loop *L) {
+ // Make sure the loop is in simplified form
+ if (!L->isLoopSimplifyForm())
+ return false;
+
+ // Only peel loops that contain a single exit
+ if (!L->getExitingBlock() || !L->getUniqueExitBlock())
+ return false;
+
+ // Don't try to peel loops where the latch is not the exiting block.
+ // This can be an indication of two different things:
+ // 1) The loop is not rotated.
+ // 2) The loop contains irreducible control flow that involves the latch.
+ if (L->getLoopLatch() != L->getExitingBlock())
+ return false;
+
+ return true;
+}
+
+// This function calculates the number of iterations after which the given Phi
+// becomes an invariant. The pre-calculated values are memorized in the map. The
+// function (shortcut is I) is calculated according to the following definition:
+// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
+// If %y is a loop invariant, then I(%x) = 1.
+// If %y is a Phi from the loop header, I(%x) = I(%y) + 1.
+// Otherwise, I(%x) is infinite.
+// TODO: Actually if %y is an expression that depends only on Phi %z and some
+// loop invariants, we can estimate I(%x) = I(%z) + 1. The example
+// looks like:
+// %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration.
+// %y = phi(0, 5),
+// %a = %y + 1.
+static unsigned calculateIterationsToInvariance(
+ PHINode *Phi, Loop *L, BasicBlock *BackEdge,
+ SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) {
+ assert(Phi->getParent() == L->getHeader() &&
+ "Non-loop Phi should not be checked for turning into invariant.");
+ assert(BackEdge == L->getLoopLatch() && "Wrong latch?");
+ // If we already know the answer, take it from the map.
+ auto I = IterationsToInvariance.find(Phi);
+ if (I != IterationsToInvariance.end())
+ return I->second;
+
+ // Otherwise we need to analyze the input from the back edge.
+ Value *Input = Phi->getIncomingValueForBlock(BackEdge);
+ // Place infinity to map to avoid infinite recursion for cycled Phis. Such
+ // cycles can never stop on an invariant.
+ IterationsToInvariance[Phi] = InfiniteIterationsToInvariance;
+ unsigned ToInvariance = InfiniteIterationsToInvariance;
+
+ if (L->isLoopInvariant(Input))
+ ToInvariance = 1u;
+ else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
+ // Only consider Phis in header block.
+ if (IncPhi->getParent() != L->getHeader())
+ return InfiniteIterationsToInvariance;
+ // If the input becomes an invariant after X iterations, then our Phi
+ // becomes an invariant after X + 1 iterations.
+ unsigned InputToInvariance = calculateIterationsToInvariance(
+ IncPhi, L, BackEdge, IterationsToInvariance);
+ if (InputToInvariance != InfiniteIterationsToInvariance)
+ ToInvariance = InputToInvariance + 1u;
+ }
+
+ // If we found that this Phi lies in an invariant chain, update the map.
+ if (ToInvariance != InfiniteIterationsToInvariance)
+ IterationsToInvariance[Phi] = ToInvariance;
+ return ToInvariance;
+}
+
+// Return the number of iterations we want to peel off.
+void llvm::computePeelCount(Loop *L, unsigned LoopSize,
+ TargetTransformInfo::UnrollingPreferences &UP,
+ unsigned &TripCount) {
+ assert(LoopSize > 0 && "Zero loop size is not allowed!");
+ UP.PeelCount = 0;
+ if (!canPeel(L))
+ return;
+
+ // Only try to peel innermost loops.
+ if (!L->empty())
+ return;
+
+ // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
+ // iterations of the loop. For this we compute the number for iterations after
+ // which every Phi is guaranteed to become an invariant, and try to peel the
+ // maximum number of iterations among these values, thus turning all those
+ // Phis into invariants.
+ // First, check that we can peel at least one iteration.
+ if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) {
+ // Store the pre-calculated values here.
+ SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
+ // Now go through all Phis to calculate their the number of iterations they
+ // need to become invariants.
+ unsigned DesiredPeelCount = 0;
+ BasicBlock *BackEdge = L->getLoopLatch();
+ assert(BackEdge && "Loop is not in simplified form?");
+ for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
+ PHINode *Phi = cast<PHINode>(&*BI);
+ unsigned ToInvariance = calculateIterationsToInvariance(
+ Phi, L, BackEdge, IterationsToInvariance);
+ if (ToInvariance != InfiniteIterationsToInvariance)
+ DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance);
+ }
+ if (DesiredPeelCount > 0) {
+ // Pay respect to limitations implied by loop size and the max peel count.
+ unsigned MaxPeelCount = UnrollPeelMaxCount;
+ MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1);
+ DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
+ // Consider max peel count limitation.
+ assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
+ DEBUG(dbgs() << "Peel " << DesiredPeelCount << " iteration(s) to turn"
+ << " some Phis into invariants.\n");
+ UP.PeelCount = DesiredPeelCount;
+ return;
+ }
+ }
+
+ // Bail if we know the statically calculated trip count.
+ // In this case we rather prefer partial unrolling.
+ if (TripCount)
+ return;
+
+ // If the user provided a peel count, use that.
+ bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
+ if (UserPeelCount) {
+ DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
+ << " iterations.\n");
+ UP.PeelCount = UnrollForcePeelCount;
+ return;
+ }
+
+ // If we don't know the trip count, but have reason to believe the average
+ // trip count is low, peeling should be beneficial, since we will usually
+ // hit the peeled section.
+ // We only do this in the presence of profile information, since otherwise
+ // our estimates of the trip count are not reliable enough.
+ if (UP.AllowPeeling && L->getHeader()->getParent()->getEntryCount()) {
+ Optional<unsigned> PeelCount = getLoopEstimatedTripCount(L);
+ if (!PeelCount)
+ return;
+
+ DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount
+ << "\n");
+
+ if (*PeelCount) {
+ if ((*PeelCount <= UnrollPeelMaxCount) &&
+ (LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
+ DEBUG(dbgs() << "Peeling first " << *PeelCount << " iterations.\n");
+ UP.PeelCount = *PeelCount;
+ return;
+ }
+ DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
+ DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
+ DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) << "\n");
+ DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n");
+ }
+ }
+
+ return;
+}
+
+/// \brief Update the branch weights of the latch of a peeled-off loop
+/// iteration.
+/// This sets the branch weights for the latch of the recently peeled off loop
+/// iteration correctly.
+/// Our goal is to make sure that:
+/// a) The total weight of all the copies of the loop body is preserved.
+/// b) The total weight of the loop exit is preserved.
+/// c) The body weight is reasonably distributed between the peeled iterations.
+///
+/// \param Header The copy of the header block that belongs to next iteration.
+/// \param LatchBR The copy of the latch branch that belongs to this iteration.
+/// \param IterNumber The serial number of the iteration that was just
+/// peeled off.
+/// \param AvgIters The average number of iterations we expect the loop to have.
+/// \param[in,out] PeeledHeaderWeight The total number of dynamic loop
+/// iterations that are unaccounted for. As an input, it represents the number
+/// of times we expect to enter the header of the iteration currently being
+/// peeled off. The output is the number of times we expect to enter the
+/// header of the next iteration.
+static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
+ unsigned IterNumber, unsigned AvgIters,
+ uint64_t &PeeledHeaderWeight) {
+
+ // FIXME: Pick a more realistic distribution.
+ // Currently the proportion of weight we assign to the fall-through
+ // side of the branch drops linearly with the iteration number, and we use
+ // a 0.9 fudge factor to make the drop-off less sharp...
+ if (PeeledHeaderWeight) {
+ uint64_t FallThruWeight =
+ PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9);
+ uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight;
+ PeeledHeaderWeight -= ExitWeight;
+
+ unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
+ MDBuilder MDB(LatchBR->getContext());
+ MDNode *WeightNode =
+ HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)
+ : MDB.createBranchWeights(FallThruWeight, ExitWeight);
+ LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
+ }
+}
+
+/// \brief Clones the body of the loop L, putting it between \p InsertTop and \p
+/// InsertBot.
+/// \param IterNumber The serial number of the iteration currently being
+/// peeled off.
+/// \param Exit The exit block of the original loop.
+/// \param[out] NewBlocks A list of the the blocks in the newly created clone
+/// \param[out] VMap The value map between the loop and the new clone.
+/// \param LoopBlocks A helper for DFS-traversal of the loop.
+/// \param LVMap A value-map that maps instructions from the original loop to
+/// instructions in the last peeled-off iteration.
+static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
+ BasicBlock *InsertBot, BasicBlock *Exit,
+ SmallVectorImpl<BasicBlock *> &NewBlocks,
+ LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
+ ValueToValueMapTy &LVMap, DominatorTree *DT,
+ LoopInfo *LI) {
+
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ BasicBlock *PreHeader = L->getLoopPreheader();
+
+ Function *F = Header->getParent();
+ LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
+ LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
+ Loop *ParentLoop = L->getParentLoop();
+
+ // For each block in the original loop, create a new copy,
+ // and update the value map with the newly created values.
+ for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
+ BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
+ NewBlocks.push_back(NewBB);
+
+ if (ParentLoop)
+ ParentLoop->addBasicBlockToLoop(NewBB, *LI);
+
+ VMap[*BB] = NewBB;
+
+ // If dominator tree is available, insert nodes to represent cloned blocks.
+ if (DT) {
+ if (Header == *BB)
+ DT->addNewBlock(NewBB, InsertTop);
+ else {
+ DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
+ // VMap must contain entry for IDom, as the iteration order is RPO.
+ DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
+ }
+ }
+ }
+
+ // Hook-up the control flow for the newly inserted blocks.
+ // The new header is hooked up directly to the "top", which is either
+ // the original loop preheader (for the first iteration) or the previous
+ // iteration's exiting block (for every other iteration)
+ InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
+
+ // Similarly, for the latch:
+ // The original exiting edge is still hooked up to the loop exit.
+ // The backedge now goes to the "bottom", which is either the loop's real
+ // header (for the last peeled iteration) or the copied header of the next
+ // iteration (for every other iteration)
+ BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
+ BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator());
+ unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
+ LatchBR->setSuccessor(HeaderIdx, InsertBot);
+ LatchBR->setSuccessor(1 - HeaderIdx, Exit);
+ if (DT)
+ DT->changeImmediateDominator(InsertBot, NewLatch);
+
+ // The new copy of the loop body starts with a bunch of PHI nodes
+ // that pick an incoming value from either the preheader, or the previous
+ // loop iteration. Since this copy is no longer part of the loop, we
+ // resolve this statically:
+ // For the first iteration, we use the value from the preheader directly.
+ // For any other iteration, we replace the phi with the value generated by
+ // the immediately preceding clone of the loop body (which represents
+ // the previous iteration).
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
+ if (IterNumber == 0) {
+ VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
+ } else {
+ Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
+ Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
+ if (LatchInst && L->contains(LatchInst))
+ VMap[&*I] = LVMap[LatchInst];
+ else
+ VMap[&*I] = LatchVal;
+ }
+ cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
+ }
+
+ // Fix up the outgoing values - we need to add a value for the iteration
+ // we've just created. Note that this must happen *after* the incoming
+ // values are adjusted, since the value going out of the latch may also be
+ // a value coming into the header.
+ for (BasicBlock::iterator I = Exit->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PHI = cast<PHINode>(I);
+ Value *LatchVal = PHI->getIncomingValueForBlock(Latch);
+ Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
+ if (LatchInst && L->contains(LatchInst))
+ LatchVal = VMap[LatchVal];
+ PHI->addIncoming(LatchVal, cast<BasicBlock>(VMap[Latch]));
+ }
+
+ // LastValueMap is updated with the values for the current loop
+ // which are used the next time this function is called.
+ for (const auto &KV : VMap)
+ LVMap[KV.first] = KV.second;
+}
+
+/// \brief Peel off the first \p PeelCount iterations of loop \p L.
+///
+/// Note that this does not peel them off as a single straight-line block.
+/// Rather, each iteration is peeled off separately, and needs to check the
+/// exit condition.
+/// For loops that dynamically execute \p PeelCount iterations or less
+/// this provides a benefit, since the peeled off iterations, which account
+/// for the bulk of dynamic execution, can be further simplified by scalar
+/// optimizations.
+bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
+ ScalarEvolution *SE, DominatorTree *DT,
+ AssumptionCache *AC, bool PreserveLCSSA) {
+ if (!canPeel(L))
+ return false;
+
+ LoopBlocksDFS LoopBlocks(L);
+ LoopBlocks.perform(LI);
+
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *PreHeader = L->getLoopPreheader();
+ BasicBlock *Latch = L->getLoopLatch();
+ BasicBlock *Exit = L->getUniqueExitBlock();
+
+ Function *F = Header->getParent();
+
+ // Set up all the necessary basic blocks. It is convenient to split the
+ // preheader into 3 parts - two blocks to anchor the peeled copy of the loop
+ // body, and a new preheader for the "real" loop.
+
+ // Peeling the first iteration transforms.
+ //
+ // PreHeader:
+ // ...
+ // Header:
+ // LoopBody
+ // If (cond) goto Header
+ // Exit:
+ //
+ // into
+ //
+ // InsertTop:
+ // LoopBody
+ // If (!cond) goto Exit
+ // InsertBot:
+ // NewPreHeader:
+ // ...
+ // Header:
+ // LoopBody
+ // If (cond) goto Header
+ // Exit:
+ //
+ // Each following iteration will split the current bottom anchor in two,
+ // and put the new copy of the loop body between these two blocks. That is,
+ // after peeling another iteration from the example above, we'll split
+ // InsertBot, and get:
+ //
+ // InsertTop:
+ // LoopBody
+ // If (!cond) goto Exit
+ // InsertBot:
+ // LoopBody
+ // If (!cond) goto Exit
+ // InsertBot.next:
+ // NewPreHeader:
+ // ...
+ // Header:
+ // LoopBody
+ // If (cond) goto Header
+ // Exit:
+
+ BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI);
+ BasicBlock *InsertBot =
+ SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI);
+ BasicBlock *NewPreHeader =
+ SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
+
+ InsertTop->setName(Header->getName() + ".peel.begin");
+ InsertBot->setName(Header->getName() + ".peel.next");
+ NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
+
+ ValueToValueMapTy LVMap;
+
+ // If we have branch weight information, we'll want to update it for the
+ // newly created branches.
+ BranchInst *LatchBR =
+ cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
+ unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
+
+ uint64_t TrueWeight, FalseWeight;
+ uint64_t ExitWeight = 0, CurHeaderWeight = 0;
+ if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
+ ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
+ // The # of times the loop body executes is the sum of the exit block
+ // weight and the # of times the backedges are taken.
+ CurHeaderWeight = TrueWeight + FalseWeight;
+ }
+
+ // For each peeled-off iteration, make a copy of the loop.
+ for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
+ SmallVector<BasicBlock *, 8> NewBlocks;
+ ValueToValueMapTy VMap;
+
+ // Subtract the exit weight from the current header weight -- the exit
+ // weight is exactly the weight of the previous iteration's header.
+ // FIXME: due to the way the distribution is constructed, we need a
+ // guard here to make sure we don't end up with non-positive weights.
+ if (ExitWeight < CurHeaderWeight)
+ CurHeaderWeight -= ExitWeight;
+ else
+ CurHeaderWeight = 1;
+
+ cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit,
+ NewBlocks, LoopBlocks, VMap, LVMap, DT, LI);
+
+ // Remap to use values from the current iteration instead of the
+ // previous one.
+ remapInstructionsInBlocks(NewBlocks, VMap);
+
+ if (DT) {
+ // Latches of the cloned loops dominate over the loop exit, so idom of the
+ // latter is the first cloned loop body, as original PreHeader dominates
+ // the original loop body.
+ if (Iter == 0)
+ DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch]));
+#ifndef NDEBUG
+ if (VerifyDomInfo)
+ DT->verifyDomTree();
+#endif
+ }
+
+ updateBranchWeights(InsertBot, cast<BranchInst>(VMap[LatchBR]), Iter,
+ PeelCount, ExitWeight);
+
+ InsertTop = InsertBot;
+ InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
+ InsertBot->setName(Header->getName() + ".peel.next");
+
+ F->getBasicBlockList().splice(InsertTop->getIterator(),
+ F->getBasicBlockList(),
+ NewBlocks[0]->getIterator(), F->end());
+ }
+
+ // Now adjust the phi nodes in the loop header to get their initial values
+ // from the last peeled-off iteration instead of the preheader.
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PHI = cast<PHINode>(I);
+ Value *NewVal = PHI->getIncomingValueForBlock(Latch);
+ Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
+ if (LatchInst && L->contains(LatchInst))
+ NewVal = LVMap[LatchInst];
+
+ PHI->setIncomingValue(PHI->getBasicBlockIndex(NewPreHeader), NewVal);
+ }
+
+ // Adjust the branch weights on the loop exit.
+ if (ExitWeight) {
+ // The backedge count is the difference of current header weight and
+ // current loop exit weight. If the current header weight is smaller than
+ // the current loop exit weight, we mark the loop backedge weight as 1.
+ uint64_t BackEdgeWeight = 0;
+ if (ExitWeight < CurHeaderWeight)
+ BackEdgeWeight = CurHeaderWeight - ExitWeight;
+ else
+ BackEdgeWeight = 1;
+ MDBuilder MDB(LatchBR->getContext());
+ MDNode *WeightNode =
+ HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
+ : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
+ LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
+ }
+
+ // If the loop is nested, we changed the parent loop, update SE.
+ if (Loop *ParentLoop = L->getParentLoop()) {
+ SE->forgetLoop(ParentLoop);
+
+ // FIXME: Incrementally update loop-simplify
+ simplifyLoop(ParentLoop, DT, LI, SE, AC, PreserveLCSSA);
+ } else {
+ // FIXME: Incrementally update loop-simplify
+ simplifyLoop(L, DT, LI, SE, AC, PreserveLCSSA);
+ }
+
+ NumPeeled++;
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
new file mode 100644
index 000000000000..d43ce7abb7cd
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -0,0 +1,873 @@
+//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities for loops with run-time
+// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
+// trip counts.
+//
+// The functions in this file are used to generate extra code when the
+// run-time trip count modulo the unroll factor is not 0. When this is the
+// case, we need to generate code to execute these 'left over' iterations.
+//
+// The current strategy generates an if-then-else sequence prior to the
+// unrolled loop to execute the 'left over' iterations before or after the
+// unrolled loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include <algorithm>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-unroll"
+
+STATISTIC(NumRuntimeUnrolled,
+ "Number of loops unrolled with run-time trip counts");
+static cl::opt<bool> UnrollRuntimeMultiExit(
+ "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
+ cl::desc("Allow runtime unrolling for loops with multiple exits, when "
+ "epilog is generated"));
+
+/// Connect the unrolling prolog code to the original loop.
+/// The unrolling prolog code contains code to execute the
+/// 'extra' iterations if the run-time trip count modulo the
+/// unroll count is non-zero.
+///
+/// This function performs the following:
+/// - Create PHI nodes at prolog end block to combine values
+/// that exit the prolog code and jump around the prolog.
+/// - Add a PHI operand to a PHI node at the loop exit block
+/// for values that exit the prolog and go around the loop.
+/// - Branch around the original loop if the trip count is less
+/// than the unroll factor.
+///
+static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
+ BasicBlock *PrologExit,
+ BasicBlock *OriginalLoopLatchExit,
+ BasicBlock *PreHeader, BasicBlock *NewPreHeader,
+ ValueToValueMapTy &VMap, DominatorTree *DT,
+ LoopInfo *LI, bool PreserveLCSSA) {
+ BasicBlock *Latch = L->getLoopLatch();
+ assert(Latch && "Loop must have a latch");
+ BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
+
+ // Create a PHI node for each outgoing value from the original loop
+ // (which means it is an outgoing value from the prolog code too).
+ // The new PHI node is inserted in the prolog end basic block.
+ // The new PHI node value is added as an operand of a PHI node in either
+ // the loop header or the loop exit block.
+ for (BasicBlock *Succ : successors(Latch)) {
+ for (Instruction &BBI : *Succ) {
+ PHINode *PN = dyn_cast<PHINode>(&BBI);
+ // Exit when we passed all PHI nodes.
+ if (!PN)
+ break;
+ // Add a new PHI node to the prolog end block and add the
+ // appropriate incoming values.
+ PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
+ PrologExit->getFirstNonPHI());
+ // Adding a value to the new PHI node from the original loop preheader.
+ // This is the value that skips all the prolog code.
+ if (L->contains(PN)) {
+ NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader),
+ PreHeader);
+ } else {
+ NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader);
+ }
+
+ Value *V = PN->getIncomingValueForBlock(Latch);
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (L->contains(I)) {
+ V = VMap.lookup(I);
+ }
+ }
+ // Adding a value to the new PHI node from the last prolog block
+ // that was created.
+ NewPN->addIncoming(V, PrologLatch);
+
+ // Update the existing PHI node operand with the value from the
+ // new PHI node. How this is done depends on if the existing
+ // PHI node is in the original loop block, or the exit block.
+ if (L->contains(PN)) {
+ PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN);
+ } else {
+ PN->addIncoming(NewPN, PrologExit);
+ }
+ }
+ }
+
+ // Make sure that created prolog loop is in simplified form
+ SmallVector<BasicBlock *, 4> PrologExitPreds;
+ Loop *PrologLoop = LI->getLoopFor(PrologLatch);
+ if (PrologLoop) {
+ for (BasicBlock *PredBB : predecessors(PrologExit))
+ if (PrologLoop->contains(PredBB))
+ PrologExitPreds.push_back(PredBB);
+
+ SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
+ PreserveLCSSA);
+ }
+
+ // Create a branch around the original loop, which is taken if there are no
+ // iterations remaining to be executed after running the prologue.
+ Instruction *InsertPt = PrologExit->getTerminator();
+ IRBuilder<> B(InsertPt);
+
+ assert(Count != 0 && "nonsensical Count!");
+
+ // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
+ // This means %xtraiter is (BECount + 1) and all of the iterations of this
+ // loop were executed by the prologue. Note that if BECount <u (Count - 1)
+ // then (BECount + 1) cannot unsigned-overflow.
+ Value *BrLoopExit =
+ B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
+ // Split the exit to maintain loop canonicalization guarantees
+ SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
+ SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
+ PreserveLCSSA);
+ // Add the branch to the exit block (around the unrolled loop)
+ B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
+ InsertPt->eraseFromParent();
+ if (DT)
+ DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit);
+}
+
+/// Connect the unrolling epilog code to the original loop.
+/// The unrolling epilog code contains code to execute the
+/// 'extra' iterations if the run-time trip count modulo the
+/// unroll count is non-zero.
+///
+/// This function performs the following:
+/// - Update PHI nodes at the unrolling loop exit and epilog loop exit
+/// - Create PHI nodes at the unrolling loop exit to combine
+/// values that exit the unrolling loop code and jump around it.
+/// - Update PHI operands in the epilog loop by the new PHI nodes
+/// - Branch around the epilog loop if extra iters (ModVal) is zero.
+///
+static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
+ BasicBlock *Exit, BasicBlock *PreHeader,
+ BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
+ ValueToValueMapTy &VMap, DominatorTree *DT,
+ LoopInfo *LI, bool PreserveLCSSA) {
+ BasicBlock *Latch = L->getLoopLatch();
+ assert(Latch && "Loop must have a latch");
+ BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
+
+ // Loop structure should be the following:
+ //
+ // PreHeader
+ // NewPreHeader
+ // Header
+ // ...
+ // Latch
+ // NewExit (PN)
+ // EpilogPreHeader
+ // EpilogHeader
+ // ...
+ // EpilogLatch
+ // Exit (EpilogPN)
+
+ // Update PHI nodes at NewExit and Exit.
+ for (Instruction &BBI : *NewExit) {
+ PHINode *PN = dyn_cast<PHINode>(&BBI);
+ // Exit when we passed all PHI nodes.
+ if (!PN)
+ break;
+ // PN should be used in another PHI located in Exit block as
+ // Exit was split by SplitBlockPredecessors into Exit and NewExit
+ // Basicaly it should look like:
+ // NewExit:
+ // PN = PHI [I, Latch]
+ // ...
+ // Exit:
+ // EpilogPN = PHI [PN, EpilogPreHeader]
+ //
+ // There is EpilogPreHeader incoming block instead of NewExit as
+ // NewExit was spilt 1 more time to get EpilogPreHeader.
+ assert(PN->hasOneUse() && "The phi should have 1 use");
+ PHINode *EpilogPN = cast<PHINode> (PN->use_begin()->getUser());
+ assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
+
+ // Add incoming PreHeader from branch around the Loop
+ PN->addIncoming(UndefValue::get(PN->getType()), PreHeader);
+
+ Value *V = PN->getIncomingValueForBlock(Latch);
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I && L->contains(I))
+ // If value comes from an instruction in the loop add VMap value.
+ V = VMap.lookup(I);
+ // For the instruction out of the loop, constant or undefined value
+ // insert value itself.
+ EpilogPN->addIncoming(V, EpilogLatch);
+
+ assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
+ "EpilogPN should have EpilogPreHeader incoming block");
+ // Change EpilogPreHeader incoming block to NewExit.
+ EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
+ NewExit);
+ // Now PHIs should look like:
+ // NewExit:
+ // PN = PHI [I, Latch], [undef, PreHeader]
+ // ...
+ // Exit:
+ // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
+ }
+
+ // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
+ // Update corresponding PHI nodes in epilog loop.
+ for (BasicBlock *Succ : successors(Latch)) {
+ // Skip this as we already updated phis in exit blocks.
+ if (!L->contains(Succ))
+ continue;
+ for (Instruction &BBI : *Succ) {
+ PHINode *PN = dyn_cast<PHINode>(&BBI);
+ // Exit when we passed all PHI nodes.
+ if (!PN)
+ break;
+ // Add new PHI nodes to the loop exit block and update epilog
+ // PHIs with the new PHI values.
+ PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
+ NewExit->getFirstNonPHI());
+ // Adding a value to the new PHI node from the unrolling loop preheader.
+ NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader);
+ // Adding a value to the new PHI node from the unrolling loop latch.
+ NewPN->addIncoming(PN->getIncomingValueForBlock(Latch), Latch);
+
+ // Update the existing PHI node operand with the value from the new PHI
+ // node. Corresponding instruction in epilog loop should be PHI.
+ PHINode *VPN = cast<PHINode>(VMap[&BBI]);
+ VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN);
+ }
+ }
+
+ Instruction *InsertPt = NewExit->getTerminator();
+ IRBuilder<> B(InsertPt);
+ Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
+ assert(Exit && "Loop must have a single exit block only");
+ // Split the epilogue exit to maintain loop canonicalization guarantees
+ SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
+ SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI,
+ PreserveLCSSA);
+ // Add the branch to the exit block (around the unrolling loop)
+ B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
+ InsertPt->eraseFromParent();
+ if (DT)
+ DT->changeImmediateDominator(Exit, NewExit);
+
+ // Split the main loop exit to maintain canonicalization guarantees.
+ SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
+ SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI,
+ PreserveLCSSA);
+}
+
+/// Create a clone of the blocks in a loop and connect them together.
+/// If CreateRemainderLoop is false, loop structure will not be cloned,
+/// otherwise a new loop will be created including all cloned blocks, and the
+/// iterator of it switches to count NewIter down to 0.
+/// The cloned blocks should be inserted between InsertTop and InsertBot.
+/// If loop structure is cloned InsertTop should be new preheader, InsertBot
+/// new loop exit.
+/// Return the new cloned loop that is created when CreateRemainderLoop is true.
+static Loop *
+CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
+ const bool UseEpilogRemainder, BasicBlock *InsertTop,
+ BasicBlock *InsertBot, BasicBlock *Preheader,
+ std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
+ ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
+ StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ Function *F = Header->getParent();
+ LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
+ LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
+ Loop *ParentLoop = L->getParentLoop();
+ NewLoopsMap NewLoops;
+ NewLoops[ParentLoop] = ParentLoop;
+ if (!CreateRemainderLoop)
+ NewLoops[L] = ParentLoop;
+
+ // For each block in the original loop, create a new copy,
+ // and update the value map with the newly created values.
+ for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
+ BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
+ NewBlocks.push_back(NewBB);
+
+ // If we're unrolling the outermost loop, there's no remainder loop,
+ // and this block isn't in a nested loop, then the new block is not
+ // in any loop. Otherwise, add it to loopinfo.
+ if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop)
+ addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
+
+ VMap[*BB] = NewBB;
+ if (Header == *BB) {
+ // For the first block, add a CFG connection to this newly
+ // created block.
+ InsertTop->getTerminator()->setSuccessor(0, NewBB);
+ }
+
+ if (DT) {
+ if (Header == *BB) {
+ // The header is dominated by the preheader.
+ DT->addNewBlock(NewBB, InsertTop);
+ } else {
+ // Copy information from original loop to unrolled loop.
+ BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
+ DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
+ }
+ }
+
+ if (Latch == *BB) {
+ // For the last block, if CreateRemainderLoop is false, create a direct
+ // jump to InsertBot. If not, create a loop back to cloned head.
+ VMap.erase((*BB)->getTerminator());
+ BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
+ BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
+ IRBuilder<> Builder(LatchBR);
+ if (!CreateRemainderLoop) {
+ Builder.CreateBr(InsertBot);
+ } else {
+ PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
+ suffix + ".iter",
+ FirstLoopBB->getFirstNonPHI());
+ Value *IdxSub =
+ Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
+ NewIdx->getName() + ".sub");
+ Value *IdxCmp =
+ Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
+ Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
+ NewIdx->addIncoming(NewIter, InsertTop);
+ NewIdx->addIncoming(IdxSub, NewBB);
+ }
+ LatchBR->eraseFromParent();
+ }
+ }
+
+ // Change the incoming values to the ones defined in the preheader or
+ // cloned loop.
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
+ if (!CreateRemainderLoop) {
+ if (UseEpilogRemainder) {
+ unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
+ NewPHI->setIncomingBlock(idx, InsertTop);
+ NewPHI->removeIncomingValue(Latch, false);
+ } else {
+ VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
+ cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
+ }
+ } else {
+ unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
+ NewPHI->setIncomingBlock(idx, InsertTop);
+ BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
+ idx = NewPHI->getBasicBlockIndex(Latch);
+ Value *InVal = NewPHI->getIncomingValue(idx);
+ NewPHI->setIncomingBlock(idx, NewLatch);
+ if (Value *V = VMap.lookup(InVal))
+ NewPHI->setIncomingValue(idx, V);
+ }
+ }
+ if (CreateRemainderLoop) {
+ Loop *NewLoop = NewLoops[L];
+ assert(NewLoop && "L should have been cloned");
+ // Add unroll disable metadata to disable future unrolling for this loop.
+ SmallVector<Metadata *, 4> MDs;
+ // Reserve first location for self reference to the LoopID metadata node.
+ MDs.push_back(nullptr);
+ MDNode *LoopID = NewLoop->getLoopID();
+ if (LoopID) {
+ // First remove any existing loop unrolling metadata.
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ bool IsUnrollMetadata = false;
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ if (MD) {
+ const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
+ }
+ if (!IsUnrollMetadata)
+ MDs.push_back(LoopID->getOperand(i));
+ }
+ }
+
+ LLVMContext &Context = NewLoop->getHeader()->getContext();
+ SmallVector<Metadata *, 1> DisableOperands;
+ DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
+ MDNode *DisableNode = MDNode::get(Context, DisableOperands);
+ MDs.push_back(DisableNode);
+
+ MDNode *NewLoopID = MDNode::get(Context, MDs);
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+ NewLoop->setLoopID(NewLoopID);
+ return NewLoop;
+ }
+ else
+ return nullptr;
+}
+
+/// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits
+/// is populated with all the loop exit blocks other than the LatchExit block.
+static bool
+canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits,
+ BasicBlock *LatchExit, bool PreserveLCSSA,
+ bool UseEpilogRemainder) {
+
+ // Support runtime unrolling for multiple exit blocks and multiple exiting
+ // blocks.
+ if (!UnrollRuntimeMultiExit)
+ return false;
+ // Even if runtime multi exit is enabled, we currently have some correctness
+ // constrains in unrolling a multi-exit loop.
+ // We rely on LCSSA form being preserved when the exit blocks are transformed.
+ if (!PreserveLCSSA)
+ return false;
+ SmallVector<BasicBlock *, 4> Exits;
+ L->getUniqueExitBlocks(Exits);
+ for (auto *BB : Exits)
+ if (BB != LatchExit)
+ OtherExits.push_back(BB);
+
+ // TODO: Support multiple exiting blocks jumping to the `LatchExit` when
+ // UnrollRuntimeMultiExit is true. This will need updating the logic in
+ // connectEpilog/connectProlog.
+ if (!LatchExit->getSinglePredecessor()) {
+ DEBUG(dbgs() << "Bailout for multi-exit handling when latch exit has >1 "
+ "predecessor.\n");
+ return false;
+ }
+ // FIXME: We bail out of multi-exit unrolling when epilog loop is generated
+ // and L is an inner loop. This is because in presence of multiple exits, the
+ // outer loop is incorrect: we do not add the EpilogPreheader and exit to the
+ // outer loop. This is automatically handled in the prolog case, so we do not
+ // have that bug in prolog generation.
+ if (UseEpilogRemainder && L->getParentLoop())
+ return false;
+
+ // All constraints have been satisfied.
+ return true;
+}
+
+
+
+/// Insert code in the prolog/epilog code when unrolling a loop with a
+/// run-time trip-count.
+///
+/// This method assumes that the loop unroll factor is total number
+/// of loop bodies in the loop after unrolling. (Some folks refer
+/// to the unroll factor as the number of *extra* copies added).
+/// We assume also that the loop unroll factor is a power-of-two. So, after
+/// unrolling the loop, the number of loop bodies executed is 2,
+/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
+/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
+/// the switch instruction is generated.
+///
+/// ***Prolog case***
+/// extraiters = tripcount % loopfactor
+/// if (extraiters == 0) jump Loop:
+/// else jump Prol:
+/// Prol: LoopBody;
+/// extraiters -= 1 // Omitted if unroll factor is 2.
+/// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
+/// if (tripcount < loopfactor) jump End:
+/// Loop:
+/// ...
+/// End:
+///
+/// ***Epilog case***
+/// extraiters = tripcount % loopfactor
+/// if (tripcount < loopfactor) jump LoopExit:
+/// unroll_iters = tripcount - extraiters
+/// Loop: LoopBody; (executes unroll_iter times);
+/// unroll_iter -= 1
+/// if (unroll_iter != 0) jump Loop:
+/// LoopExit:
+/// if (extraiters == 0) jump EpilExit:
+/// Epil: LoopBody; (executes extraiters times)
+/// extraiters -= 1 // Omitted if unroll factor is 2.
+/// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
+/// EpilExit:
+
+bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
+ bool AllowExpensiveTripCount,
+ bool UseEpilogRemainder,
+ LoopInfo *LI, ScalarEvolution *SE,
+ DominatorTree *DT, bool PreserveLCSSA) {
+ DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
+ DEBUG(L->dump());
+
+ // Make sure the loop is in canonical form.
+ if (!L->isLoopSimplifyForm()) {
+ DEBUG(dbgs() << "Not in simplify form!\n");
+ return false;
+ }
+
+ // Guaranteed by LoopSimplifyForm.
+ BasicBlock *Latch = L->getLoopLatch();
+ BasicBlock *Header = L->getHeader();
+
+ BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
+ unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
+ BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
+ // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
+ // targets of the Latch be an exit block out of the loop. This needs
+ // to be guaranteed by the callers of UnrollRuntimeLoopRemainder.
+ assert(!L->contains(LatchExit) &&
+ "one of the loop latch successors should be the exit block!");
+ // These are exit blocks other than the target of the latch exiting block.
+ SmallVector<BasicBlock *, 4> OtherExits;
+ bool isMultiExitUnrollingEnabled = canSafelyUnrollMultiExitLoop(
+ L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder);
+ // Support only single exit and exiting block unless multi-exit loop unrolling is enabled.
+ if (!isMultiExitUnrollingEnabled &&
+ (!L->getExitingBlock() || OtherExits.size())) {
+ DEBUG(
+ dbgs()
+ << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
+ "enabled!\n");
+ return false;
+ }
+ // Use Scalar Evolution to compute the trip count. This allows more loops to
+ // be unrolled than relying on induction var simplification.
+ if (!SE)
+ return false;
+
+ // Only unroll loops with a computable trip count, and the trip count needs
+ // to be an int value (allowing a pointer type is a TODO item).
+ // We calculate the backedge count by using getExitCount on the Latch block,
+ // which is proven to be the only exiting block in this loop. This is same as
+ // calculating getBackedgeTakenCount on the loop (which computes SCEV for all
+ // exiting blocks).
+ const SCEV *BECountSC = SE->getExitCount(L, Latch);
+ if (isa<SCEVCouldNotCompute>(BECountSC) ||
+ !BECountSC->getType()->isIntegerTy()) {
+ DEBUG(dbgs() << "Could not compute exit block SCEV\n");
+ return false;
+ }
+
+ unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
+
+ // Add 1 since the backedge count doesn't include the first loop iteration.
+ const SCEV *TripCountSC =
+ SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
+ if (isa<SCEVCouldNotCompute>(TripCountSC)) {
+ DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
+ return false;
+ }
+
+ BasicBlock *PreHeader = L->getLoopPreheader();
+ BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
+ const DataLayout &DL = Header->getModule()->getDataLayout();
+ SCEVExpander Expander(*SE, DL, "loop-unroll");
+ if (!AllowExpensiveTripCount &&
+ Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) {
+ DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
+ return false;
+ }
+
+ // This constraint lets us deal with an overflowing trip count easily; see the
+ // comment on ModVal below.
+ if (Log2_32(Count) > BEWidth) {
+ DEBUG(dbgs()
+ << "Count failed constraint on overflow trip count calculation.\n");
+ return false;
+ }
+
+ // Loop structure is the following:
+ //
+ // PreHeader
+ // Header
+ // ...
+ // Latch
+ // LatchExit
+
+ BasicBlock *NewPreHeader;
+ BasicBlock *NewExit = nullptr;
+ BasicBlock *PrologExit = nullptr;
+ BasicBlock *EpilogPreHeader = nullptr;
+ BasicBlock *PrologPreHeader = nullptr;
+
+ if (UseEpilogRemainder) {
+ // If epilog remainder
+ // Split PreHeader to insert a branch around loop for unrolling.
+ NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
+ NewPreHeader->setName(PreHeader->getName() + ".new");
+ // Split LatchExit to create phi nodes from branch above.
+ SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
+ NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa",
+ DT, LI, PreserveLCSSA);
+ // Split NewExit to insert epilog remainder loop.
+ EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);
+ EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
+ } else {
+ // If prolog remainder
+ // Split the original preheader twice to insert prolog remainder loop
+ PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
+ PrologPreHeader->setName(Header->getName() + ".prol.preheader");
+ PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
+ DT, LI);
+ PrologExit->setName(Header->getName() + ".prol.loopexit");
+ // Split PrologExit to get NewPreHeader.
+ NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
+ NewPreHeader->setName(PreHeader->getName() + ".new");
+ }
+ // Loop structure should be the following:
+ // Epilog Prolog
+ //
+ // PreHeader PreHeader
+ // *NewPreHeader *PrologPreHeader
+ // Header *PrologExit
+ // ... *NewPreHeader
+ // Latch Header
+ // *NewExit ...
+ // *EpilogPreHeader Latch
+ // LatchExit LatchExit
+
+ // Calculate conditions for branch around loop for unrolling
+ // in epilog case and around prolog remainder loop in prolog case.
+ // Compute the number of extra iterations required, which is:
+ // extra iterations = run-time trip count % loop unroll factor
+ PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
+ Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
+ PreHeaderBR);
+ Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
+ PreHeaderBR);
+ IRBuilder<> B(PreHeaderBR);
+ Value *ModVal;
+ // Calculate ModVal = (BECount + 1) % Count.
+ // Note that TripCount is BECount + 1.
+ if (isPowerOf2_32(Count)) {
+ // When Count is power of 2 we don't BECount for epilog case, however we'll
+ // need it for a branch around unrolling loop for prolog case.
+ ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
+ // 1. There are no iterations to be run in the prolog/epilog loop.
+ // OR
+ // 2. The addition computing TripCount overflowed.
+ //
+ // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
+ // the number of iterations that remain to be run in the original loop is a
+ // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
+ // explicitly check this above).
+ } else {
+ // As (BECount + 1) can potentially unsigned overflow we count
+ // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
+ Value *ModValTmp = B.CreateURem(BECount,
+ ConstantInt::get(BECount->getType(),
+ Count));
+ Value *ModValAdd = B.CreateAdd(ModValTmp,
+ ConstantInt::get(ModValTmp->getType(), 1));
+ // At that point (BECount % Count) + 1 could be equal to Count.
+ // To handle this case we need to take mod by Count one more time.
+ ModVal = B.CreateURem(ModValAdd,
+ ConstantInt::get(BECount->getType(), Count),
+ "xtraiter");
+ }
+ Value *BranchVal =
+ UseEpilogRemainder ? B.CreateICmpULT(BECount,
+ ConstantInt::get(BECount->getType(),
+ Count - 1)) :
+ B.CreateIsNotNull(ModVal, "lcmp.mod");
+ BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
+ BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
+ // Branch to either remainder (extra iterations) loop or unrolling loop.
+ B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
+ PreHeaderBR->eraseFromParent();
+ if (DT) {
+ if (UseEpilogRemainder)
+ DT->changeImmediateDominator(NewExit, PreHeader);
+ else
+ DT->changeImmediateDominator(PrologExit, PreHeader);
+ }
+ Function *F = Header->getParent();
+ // Get an ordered list of blocks in the loop to help with the ordering of the
+ // cloned blocks in the prolog/epilog code
+ LoopBlocksDFS LoopBlocks(L);
+ LoopBlocks.perform(LI);
+
+ //
+ // For each extra loop iteration, create a copy of the loop's basic blocks
+ // and generate a condition that branches to the copy depending on the
+ // number of 'left over' iterations.
+ //
+ std::vector<BasicBlock *> NewBlocks;
+ ValueToValueMapTy VMap;
+
+ // For unroll factor 2 remainder loop will have 1 iterations.
+ // Do not create 1 iteration loop.
+ bool CreateRemainderLoop = (Count != 2);
+
+ // Clone all the basic blocks in the loop. If Count is 2, we don't clone
+ // the loop, otherwise we create a cloned loop to execute the extra
+ // iterations. This function adds the appropriate CFG connections.
+ BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
+ BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
+ Loop *remainderLoop = CloneLoopBlocks(
+ L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot,
+ NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
+
+ // Insert the cloned blocks into the function.
+ F->getBasicBlockList().splice(InsertBot->getIterator(),
+ F->getBasicBlockList(),
+ NewBlocks[0]->getIterator(),
+ F->end());
+
+ // Now the loop blocks are cloned and the other exiting blocks from the
+ // remainder are connected to the original Loop's exit blocks. The remaining
+ // work is to update the phi nodes in the original loop, and take in the
+ // values from the cloned region. Also update the dominator info for
+ // OtherExits and their immediate successors, since we have new edges into
+ // OtherExits.
+ SmallSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks;
+ for (auto *BB : OtherExits) {
+ for (auto &II : *BB) {
+
+ // Given we preserve LCSSA form, we know that the values used outside the
+ // loop will be used through these phi nodes at the exit blocks that are
+ // transformed below.
+ if (!isa<PHINode>(II))
+ break;
+ PHINode *Phi = cast<PHINode>(&II);
+ unsigned oldNumOperands = Phi->getNumIncomingValues();
+ // Add the incoming values from the remainder code to the end of the phi
+ // node.
+ for (unsigned i =0; i < oldNumOperands; i++){
+ Value *newVal = VMap[Phi->getIncomingValue(i)];
+ // newVal can be a constant or derived from values outside the loop, and
+ // hence need not have a VMap value.
+ if (!newVal)
+ newVal = Phi->getIncomingValue(i);
+ Phi->addIncoming(newVal,
+ cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
+ }
+ }
+#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
+ for (BasicBlock *SuccBB : successors(BB)) {
+ assert(!(any_of(OtherExits,
+ [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) ||
+ SuccBB == LatchExit) &&
+ "Breaks the definition of dedicated exits!");
+ }
+#endif
+ // Update the dominator info because the immediate dominator is no longer the
+ // header of the original Loop. BB has edges both from L and remainder code.
+ // Since the preheader determines which loop is run (L or directly jump to
+ // the remainder code), we set the immediate dominator as the preheader.
+ if (DT) {
+ DT->changeImmediateDominator(BB, PreHeader);
+ // Also update the IDom for immediate successors of BB. If the current
+ // IDom is the header, update the IDom to be the preheader because that is
+ // the nearest common dominator of all predecessors of SuccBB. We need to
+ // check for IDom being the header because successors of exit blocks can
+ // have edges from outside the loop, and we should not incorrectly update
+ // the IDom in that case.
+ for (BasicBlock *SuccBB: successors(BB))
+ if (ImmediateSuccessorsOfExitBlocks.insert(SuccBB).second) {
+ if (DT->getNode(SuccBB)->getIDom()->getBlock() == Header) {
+ assert(!SuccBB->getSinglePredecessor() &&
+ "BB should be the IDom then!");
+ DT->changeImmediateDominator(SuccBB, PreHeader);
+ }
+ }
+ }
+ }
+
+ // Loop structure should be the following:
+ // Epilog Prolog
+ //
+ // PreHeader PreHeader
+ // NewPreHeader PrologPreHeader
+ // Header PrologHeader
+ // ... ...
+ // Latch PrologLatch
+ // NewExit PrologExit
+ // EpilogPreHeader NewPreHeader
+ // EpilogHeader Header
+ // ... ...
+ // EpilogLatch Latch
+ // LatchExit LatchExit
+
+ // Rewrite the cloned instruction operands to use the values created when the
+ // clone is created.
+ for (BasicBlock *BB : NewBlocks) {
+ for (Instruction &I : *BB) {
+ RemapInstruction(&I, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ }
+ }
+
+ if (UseEpilogRemainder) {
+ // Connect the epilog code to the original loop and update the
+ // PHI functions.
+ ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader,
+ EpilogPreHeader, NewPreHeader, VMap, DT, LI,
+ PreserveLCSSA);
+
+ // Update counter in loop for unrolling.
+ // I should be multiply of Count.
+ IRBuilder<> B2(NewPreHeader->getTerminator());
+ Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
+ BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
+ B2.SetInsertPoint(LatchBR);
+ PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
+ Header->getFirstNonPHI());
+ Value *IdxSub =
+ B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
+ NewIdx->getName() + ".nsub");
+ Value *IdxCmp;
+ if (LatchBR->getSuccessor(0) == Header)
+ IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
+ else
+ IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
+ NewIdx->addIncoming(TestVal, NewPreHeader);
+ NewIdx->addIncoming(IdxSub, Latch);
+ LatchBR->setCondition(IdxCmp);
+ } else {
+ // Connect the prolog code to the original loop and update the
+ // PHI functions.
+ ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
+ NewPreHeader, VMap, DT, LI, PreserveLCSSA);
+ }
+
+ // If this loop is nested, then the loop unroller changes the code in the
+ // parent loop, so the Scalar Evolution pass needs to be run again.
+ if (Loop *ParentLoop = L->getParentLoop())
+ SE->forgetLoop(ParentLoop);
+
+ // Canonicalize to LoopSimplifyForm both original and remainder loops. We
+ // cannot rely on the LoopUnrollPass to do this because it only does
+ // canonicalization for parent/subloops and not the sibling loops.
+ if (OtherExits.size() > 0) {
+ // Generate dedicated exit blocks for the original loop, to preserve
+ // LoopSimplifyForm.
+ formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA);
+ // Generate dedicated exit blocks for the remainder loop if one exists, to
+ // preserve LoopSimplifyForm.
+ if (remainderLoop)
+ formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
+ }
+
+ NumRuntimeUnrolled++;
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
new file mode 100644
index 000000000000..3c522786641a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -0,0 +1,1396 @@
+//===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines common loop utility functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "loop-utils"
+
+bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
+ SmallPtrSetImpl<Instruction *> &Set) {
+ for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
+ if (!Set.count(dyn_cast<Instruction>(*Use)))
+ return false;
+ return true;
+}
+
+bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) {
+ switch (Kind) {
+ default:
+ break;
+ case RK_IntegerAdd:
+ case RK_IntegerMult:
+ case RK_IntegerOr:
+ case RK_IntegerAnd:
+ case RK_IntegerXor:
+ case RK_IntegerMinMax:
+ return true;
+ }
+ return false;
+}
+
+bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) {
+ return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind);
+}
+
+bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) {
+ switch (Kind) {
+ default:
+ break;
+ case RK_IntegerAdd:
+ case RK_IntegerMult:
+ case RK_FloatAdd:
+ case RK_FloatMult:
+ return true;
+ }
+ return false;
+}
+
+Instruction *
+RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT,
+ SmallPtrSetImpl<Instruction *> &Visited,
+ SmallPtrSetImpl<Instruction *> &CI) {
+ if (!Phi->hasOneUse())
+ return Phi;
+
+ const APInt *M = nullptr;
+ Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
+
+ // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
+ // with a new integer type of the corresponding bit width.
+ if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) {
+ int32_t Bits = (*M + 1).exactLogBase2();
+ if (Bits > 0) {
+ RT = IntegerType::get(Phi->getContext(), Bits);
+ Visited.insert(Phi);
+ CI.insert(J);
+ return J;
+ }
+ }
+ return Phi;
+}
+
+bool RecurrenceDescriptor::getSourceExtensionKind(
+ Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned,
+ SmallPtrSetImpl<Instruction *> &Visited,
+ SmallPtrSetImpl<Instruction *> &CI) {
+
+ SmallVector<Instruction *, 8> Worklist;
+ bool FoundOneOperand = false;
+ unsigned DstSize = RT->getPrimitiveSizeInBits();
+ Worklist.push_back(Exit);
+
+ // Traverse the instructions in the reduction expression, beginning with the
+ // exit value.
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ for (Use &U : I->operands()) {
+
+ // Terminate the traversal if the operand is not an instruction, or we
+ // reach the starting value.
+ Instruction *J = dyn_cast<Instruction>(U.get());
+ if (!J || J == Start)
+ continue;
+
+ // Otherwise, investigate the operation if it is also in the expression.
+ if (Visited.count(J)) {
+ Worklist.push_back(J);
+ continue;
+ }
+
+ // If the operand is not in Visited, it is not a reduction operation, but
+ // it does feed into one. Make sure it is either a single-use sign- or
+ // zero-extend instruction.
+ CastInst *Cast = dyn_cast<CastInst>(J);
+ bool IsSExtInst = isa<SExtInst>(J);
+ if (!Cast || !Cast->hasOneUse() || !(isa<ZExtInst>(J) || IsSExtInst))
+ return false;
+
+ // Ensure the source type of the extend is no larger than the reduction
+ // type. It is not necessary for the types to be identical.
+ unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
+ if (SrcSize > DstSize)
+ return false;
+
+ // Furthermore, ensure that all such extends are of the same kind.
+ if (FoundOneOperand) {
+ if (IsSigned != IsSExtInst)
+ return false;
+ } else {
+ FoundOneOperand = true;
+ IsSigned = IsSExtInst;
+ }
+
+ // Lastly, if the source type of the extend matches the reduction type,
+ // add the extend to CI so that we can avoid accounting for it in the
+ // cost model.
+ if (SrcSize == DstSize)
+ CI.insert(Cast);
+ }
+ }
+ return true;
+}
+
+bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
+ Loop *TheLoop, bool HasFunNoNaNAttr,
+ RecurrenceDescriptor &RedDes) {
+ if (Phi->getNumIncomingValues() != 2)
+ return false;
+
+ // Reduction variables are only found in the loop header block.
+ if (Phi->getParent() != TheLoop->getHeader())
+ return false;
+
+ // Obtain the reduction start value from the value that comes from the loop
+ // preheader.
+ Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
+
+ // ExitInstruction is the single value which is used outside the loop.
+ // We only allow for a single reduction value to be used outside the loop.
+ // This includes users of the reduction, variables (which form a cycle
+ // which ends in the phi node).
+ Instruction *ExitInstruction = nullptr;
+ // Indicates that we found a reduction operation in our scan.
+ bool FoundReduxOp = false;
+
+ // We start with the PHI node and scan for all of the users of this
+ // instruction. All users must be instructions that can be used as reduction
+ // variables (such as ADD). We must have a single out-of-block user. The cycle
+ // must include the original PHI.
+ bool FoundStartPHI = false;
+
+ // To recognize min/max patterns formed by a icmp select sequence, we store
+ // the number of instruction we saw from the recognized min/max pattern,
+ // to make sure we only see exactly the two instructions.
+ unsigned NumCmpSelectPatternInst = 0;
+ InstDesc ReduxDesc(false, nullptr);
+
+ // Data used for determining if the recurrence has been type-promoted.
+ Type *RecurrenceType = Phi->getType();
+ SmallPtrSet<Instruction *, 4> CastInsts;
+ Instruction *Start = Phi;
+ bool IsSigned = false;
+
+ SmallPtrSet<Instruction *, 8> VisitedInsts;
+ SmallVector<Instruction *, 8> Worklist;
+
+ // Return early if the recurrence kind does not match the type of Phi. If the
+ // recurrence kind is arithmetic, we attempt to look through AND operations
+ // resulting from the type promotion performed by InstCombine. Vector
+ // operations are not limited to the legal integer widths, so we may be able
+ // to evaluate the reduction in the narrower width.
+ if (RecurrenceType->isFloatingPointTy()) {
+ if (!isFloatingPointRecurrenceKind(Kind))
+ return false;
+ } else {
+ if (!isIntegerRecurrenceKind(Kind))
+ return false;
+ if (isArithmeticRecurrenceKind(Kind))
+ Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
+ }
+
+ Worklist.push_back(Start);
+ VisitedInsts.insert(Start);
+
+ // A value in the reduction can be used:
+ // - By the reduction:
+ // - Reduction operation:
+ // - One use of reduction value (safe).
+ // - Multiple use of reduction value (not safe).
+ // - PHI:
+ // - All uses of the PHI must be the reduction (safe).
+ // - Otherwise, not safe.
+ // - By instructions outside of the loop (safe).
+ // * One value may have several outside users, but all outside
+ // uses must be of the same value.
+ // - By an instruction that is not part of the reduction (not safe).
+ // This is either:
+ // * An instruction type other than PHI or the reduction operation.
+ // * A PHI in the header other than the initial PHI.
+ while (!Worklist.empty()) {
+ Instruction *Cur = Worklist.back();
+ Worklist.pop_back();
+
+ // No Users.
+ // If the instruction has no users then this is a broken chain and can't be
+ // a reduction variable.
+ if (Cur->use_empty())
+ return false;
+
+ bool IsAPhi = isa<PHINode>(Cur);
+
+ // A header PHI use other than the original PHI.
+ if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
+ return false;
+
+ // Reductions of instructions such as Div, and Sub is only possible if the
+ // LHS is the reduction variable.
+ if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
+ !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
+ !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
+ return false;
+
+ // Any reduction instruction must be of one of the allowed kinds. We ignore
+ // the starting value (the Phi or an AND instruction if the Phi has been
+ // type-promoted).
+ if (Cur != Start) {
+ ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
+ if (!ReduxDesc.isRecurrence())
+ return false;
+ }
+
+ // A reduction operation must only have one use of the reduction value.
+ if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
+ hasMultipleUsesOf(Cur, VisitedInsts))
+ return false;
+
+ // All inputs to a PHI node must be a reduction value.
+ if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
+ return false;
+
+ if (Kind == RK_IntegerMinMax &&
+ (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
+ ++NumCmpSelectPatternInst;
+ if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
+ ++NumCmpSelectPatternInst;
+
+ // Check whether we found a reduction operator.
+ FoundReduxOp |= !IsAPhi && Cur != Start;
+
+ // Process users of current instruction. Push non-PHI nodes after PHI nodes
+ // onto the stack. This way we are going to have seen all inputs to PHI
+ // nodes once we get to them.
+ SmallVector<Instruction *, 8> NonPHIs;
+ SmallVector<Instruction *, 8> PHIs;
+ for (User *U : Cur->users()) {
+ Instruction *UI = cast<Instruction>(U);
+
+ // Check if we found the exit user.
+ BasicBlock *Parent = UI->getParent();
+ if (!TheLoop->contains(Parent)) {
+ // If we already know this instruction is used externally, move on to
+ // the next user.
+ if (ExitInstruction == Cur)
+ continue;
+
+ // Exit if you find multiple values used outside or if the header phi
+ // node is being used. In this case the user uses the value of the
+ // previous iteration, in which case we would loose "VF-1" iterations of
+ // the reduction operation if we vectorize.
+ if (ExitInstruction != nullptr || Cur == Phi)
+ return false;
+
+ // The instruction used by an outside user must be the last instruction
+ // before we feed back to the reduction phi. Otherwise, we loose VF-1
+ // operations on the value.
+ if (!is_contained(Phi->operands(), Cur))
+ return false;
+
+ ExitInstruction = Cur;
+ continue;
+ }
+
+ // Process instructions only once (termination). Each reduction cycle
+ // value must only be used once, except by phi nodes and min/max
+ // reductions which are represented as a cmp followed by a select.
+ InstDesc IgnoredVal(false, nullptr);
+ if (VisitedInsts.insert(UI).second) {
+ if (isa<PHINode>(UI))
+ PHIs.push_back(UI);
+ else
+ NonPHIs.push_back(UI);
+ } else if (!isa<PHINode>(UI) &&
+ ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
+ !isa<SelectInst>(UI)) ||
+ !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence()))
+ return false;
+
+ // Remember that we completed the cycle.
+ if (UI == Phi)
+ FoundStartPHI = true;
+ }
+ Worklist.append(PHIs.begin(), PHIs.end());
+ Worklist.append(NonPHIs.begin(), NonPHIs.end());
+ }
+
+ // This means we have seen one but not the other instruction of the
+ // pattern or more than just a select and cmp.
+ if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
+ NumCmpSelectPatternInst != 2)
+ return false;
+
+ if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
+ return false;
+
+ // If we think Phi may have been type-promoted, we also need to ensure that
+ // all source operands of the reduction are either SExtInsts or ZEstInsts. If
+ // so, we will be able to evaluate the reduction in the narrower bit width.
+ if (Start != Phi)
+ if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType,
+ IsSigned, VisitedInsts, CastInsts))
+ return false;
+
+ // We found a reduction var if we have reached the original phi node and we
+ // only have a single instruction with out-of-loop users.
+
+ // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
+ // is saved as part of the RecurrenceDescriptor.
+
+ // Save the description of this reduction variable.
+ RecurrenceDescriptor RD(
+ RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),
+ ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
+ RedDes = RD;
+
+ return true;
+}
+
+/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
+/// pattern corresponding to a min(X, Y) or max(X, Y).
+RecurrenceDescriptor::InstDesc
+RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) {
+
+ assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
+ "Expect a select instruction");
+ Instruction *Cmp = nullptr;
+ SelectInst *Select = nullptr;
+
+ // We must handle the select(cmp()) as a single instruction. Advance to the
+ // select.
+ if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
+ if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin())))
+ return InstDesc(false, I);
+ return InstDesc(Select, Prev.getMinMaxKind());
+ }
+
+ // Only handle single use cases for now.
+ if (!(Select = dyn_cast<SelectInst>(I)))
+ return InstDesc(false, I);
+ if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
+ !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
+ return InstDesc(false, I);
+ if (!Cmp->hasOneUse())
+ return InstDesc(false, I);
+
+ Value *CmpLeft;
+ Value *CmpRight;
+
+ // Look for a min/max pattern.
+ if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_UIntMin);
+ else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_UIntMax);
+ else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_SIntMax);
+ else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_SIntMin);
+ else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_FloatMin);
+ else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_FloatMax);
+ else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_FloatMin);
+ else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_FloatMax);
+
+ return InstDesc(false, I);
+}
+
+RecurrenceDescriptor::InstDesc
+RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
+ InstDesc &Prev, bool HasFunNoNaNAttr) {
+ bool FP = I->getType()->isFloatingPointTy();
+ Instruction *UAI = Prev.getUnsafeAlgebraInst();
+ if (!UAI && FP && !I->hasUnsafeAlgebra())
+ UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
+
+ switch (I->getOpcode()) {
+ default:
+ return InstDesc(false, I);
+ case Instruction::PHI:
+ return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst());
+ case Instruction::Sub:
+ case Instruction::Add:
+ return InstDesc(Kind == RK_IntegerAdd, I);
+ case Instruction::Mul:
+ return InstDesc(Kind == RK_IntegerMult, I);
+ case Instruction::And:
+ return InstDesc(Kind == RK_IntegerAnd, I);
+ case Instruction::Or:
+ return InstDesc(Kind == RK_IntegerOr, I);
+ case Instruction::Xor:
+ return InstDesc(Kind == RK_IntegerXor, I);
+ case Instruction::FMul:
+ return InstDesc(Kind == RK_FloatMult, I, UAI);
+ case Instruction::FSub:
+ case Instruction::FAdd:
+ return InstDesc(Kind == RK_FloatAdd, I, UAI);
+ case Instruction::FCmp:
+ case Instruction::ICmp:
+ case Instruction::Select:
+ if (Kind != RK_IntegerMinMax &&
+ (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
+ return InstDesc(false, I);
+ return isMinMaxSelectCmpPattern(I, Prev);
+ }
+}
+
+bool RecurrenceDescriptor::hasMultipleUsesOf(
+ Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) {
+ unsigned NumUses = 0;
+ for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E;
+ ++Use) {
+ if (Insts.count(dyn_cast<Instruction>(*Use)))
+ ++NumUses;
+ if (NumUses > 1)
+ return true;
+ }
+
+ return false;
+}
+bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
+ RecurrenceDescriptor &RedDes) {
+
+ BasicBlock *Header = TheLoop->getHeader();
+ Function &F = *Header->getParent();
+ bool HasFunNoNaNAttr =
+ F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
+
+ if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) {
+ DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)) {
+ DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)) {
+ DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)) {
+ DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)) {
+ DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr,
+ RedDes)) {
+ DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)) {
+ DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)) {
+ DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)) {
+ DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ // Not a reduction of known type.
+ return false;
+}
+
+bool RecurrenceDescriptor::isFirstOrderRecurrence(
+ PHINode *Phi, Loop *TheLoop,
+ DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
+
+ // Ensure the phi node is in the loop header and has two incoming values.
+ if (Phi->getParent() != TheLoop->getHeader() ||
+ Phi->getNumIncomingValues() != 2)
+ return false;
+
+ // Ensure the loop has a preheader and a single latch block. The loop
+ // vectorizer will need the latch to set up the next iteration of the loop.
+ auto *Preheader = TheLoop->getLoopPreheader();
+ auto *Latch = TheLoop->getLoopLatch();
+ if (!Preheader || !Latch)
+ return false;
+
+ // Ensure the phi node's incoming blocks are the loop preheader and latch.
+ if (Phi->getBasicBlockIndex(Preheader) < 0 ||
+ Phi->getBasicBlockIndex(Latch) < 0)
+ return false;
+
+ // Get the previous value. The previous value comes from the latch edge while
+ // the initial value comes form the preheader edge.
+ auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
+ if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
+ SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
+ return false;
+
+ // Ensure every user of the phi node is dominated by the previous value.
+ // The dominance requirement ensures the loop vectorizer will not need to
+ // vectorize the initial value prior to the first iteration of the loop.
+ // TODO: Consider extending this sinking to handle other kinds of instructions
+ // and expressions, beyond sinking a single cast past Previous.
+ if (Phi->hasOneUse()) {
+ auto *I = Phi->user_back();
+ if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() &&
+ DT->dominates(Previous, I->user_back())) {
+ SinkAfter[I] = Previous;
+ return true;
+ }
+ }
+
+ for (User *U : Phi->users())
+ if (auto *I = dyn_cast<Instruction>(U)) {
+ if (!DT->dominates(Previous, I))
+ return false;
+ }
+
+ return true;
+}
+
+/// This function returns the identity element (or neutral element) for
+/// the operation K.
+Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K,
+ Type *Tp) {
+ switch (K) {
+ case RK_IntegerXor:
+ case RK_IntegerAdd:
+ case RK_IntegerOr:
+ // Adding, Xoring, Oring zero to a number does not change it.
+ return ConstantInt::get(Tp, 0);
+ case RK_IntegerMult:
+ // Multiplying a number by 1 does not change it.
+ return ConstantInt::get(Tp, 1);
+ case RK_IntegerAnd:
+ // AND-ing a number with an all-1 value does not change it.
+ return ConstantInt::get(Tp, -1, true);
+ case RK_FloatMult:
+ // Multiplying a number by 1 does not change it.
+ return ConstantFP::get(Tp, 1.0L);
+ case RK_FloatAdd:
+ // Adding zero to a number does not change it.
+ return ConstantFP::get(Tp, 0.0L);
+ default:
+ llvm_unreachable("Unknown recurrence kind");
+ }
+}
+
+/// This function translates the recurrence kind to an LLVM binary operator.
+unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) {
+ switch (Kind) {
+ case RK_IntegerAdd:
+ return Instruction::Add;
+ case RK_IntegerMult:
+ return Instruction::Mul;
+ case RK_IntegerOr:
+ return Instruction::Or;
+ case RK_IntegerAnd:
+ return Instruction::And;
+ case RK_IntegerXor:
+ return Instruction::Xor;
+ case RK_FloatMult:
+ return Instruction::FMul;
+ case RK_FloatAdd:
+ return Instruction::FAdd;
+ case RK_IntegerMinMax:
+ return Instruction::ICmp;
+ case RK_FloatMinMax:
+ return Instruction::FCmp;
+ default:
+ llvm_unreachable("Unknown recurrence operation");
+ }
+}
+
+Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
+ MinMaxRecurrenceKind RK,
+ Value *Left, Value *Right) {
+ CmpInst::Predicate P = CmpInst::ICMP_NE;
+ switch (RK) {
+ default:
+ llvm_unreachable("Unknown min/max recurrence kind");
+ case MRK_UIntMin:
+ P = CmpInst::ICMP_ULT;
+ break;
+ case MRK_UIntMax:
+ P = CmpInst::ICMP_UGT;
+ break;
+ case MRK_SIntMin:
+ P = CmpInst::ICMP_SLT;
+ break;
+ case MRK_SIntMax:
+ P = CmpInst::ICMP_SGT;
+ break;
+ case MRK_FloatMin:
+ P = CmpInst::FCMP_OLT;
+ break;
+ case MRK_FloatMax:
+ P = CmpInst::FCMP_OGT;
+ break;
+ }
+
+ // We only match FP sequences with unsafe algebra, so we can unconditionally
+ // set it on any generated instructions.
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ FastMathFlags FMF;
+ FMF.setUnsafeAlgebra();
+ Builder.setFastMathFlags(FMF);
+
+ Value *Cmp;
+ if (RK == MRK_FloatMin || RK == MRK_FloatMax)
+ Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
+ else
+ Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
+
+ Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
+ return Select;
+}
+
+InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
+ const SCEV *Step, BinaryOperator *BOp)
+ : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
+ assert(IK != IK_NoInduction && "Not an induction");
+
+ // Start value type should match the induction kind and the value
+ // itself should not be null.
+ assert(StartValue && "StartValue is null");
+ assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
+ "StartValue is not a pointer for pointer induction");
+ assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
+ "StartValue is not an integer for integer induction");
+
+ // Check the Step Value. It should be non-zero integer value.
+ assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
+ "Step value is zero");
+
+ assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
+ "Step value should be constant for pointer induction");
+ assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
+ "StepValue is not an integer");
+
+ assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
+ "StepValue is not FP for FpInduction");
+ assert((IK != IK_FpInduction || (InductionBinOp &&
+ (InductionBinOp->getOpcode() == Instruction::FAdd ||
+ InductionBinOp->getOpcode() == Instruction::FSub))) &&
+ "Binary opcode should be specified for FP induction");
+}
+
+int InductionDescriptor::getConsecutiveDirection() const {
+ ConstantInt *ConstStep = getConstIntStepValue();
+ if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne()))
+ return ConstStep->getSExtValue();
+ return 0;
+}
+
+ConstantInt *InductionDescriptor::getConstIntStepValue() const {
+ if (isa<SCEVConstant>(Step))
+ return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
+ return nullptr;
+}
+
+Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index,
+ ScalarEvolution *SE,
+ const DataLayout& DL) const {
+
+ SCEVExpander Exp(*SE, DL, "induction");
+ assert(Index->getType() == Step->getType() &&
+ "Index type does not match StepValue type");
+ switch (IK) {
+ case IK_IntInduction: {
+ assert(Index->getType() == StartValue->getType() &&
+ "Index type does not match StartValue type");
+
+ // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution
+ // and calculate (Start + Index * Step) for all cases, without
+ // special handling for "isOne" and "isMinusOne".
+ // But in the real life the result code getting worse. We mix SCEV
+ // expressions and ADD/SUB operations and receive redundant
+ // intermediate values being calculated in different ways and
+ // Instcombine is unable to reduce them all.
+
+ if (getConstIntStepValue() &&
+ getConstIntStepValue()->isMinusOne())
+ return B.CreateSub(StartValue, Index);
+ if (getConstIntStepValue() &&
+ getConstIntStepValue()->isOne())
+ return B.CreateAdd(StartValue, Index);
+ const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue),
+ SE->getMulExpr(Step, SE->getSCEV(Index)));
+ return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint());
+ }
+ case IK_PtrInduction: {
+ assert(isa<SCEVConstant>(Step) &&
+ "Expected constant step for pointer induction");
+ const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step);
+ Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint());
+ return B.CreateGEP(nullptr, StartValue, Index);
+ }
+ case IK_FpInduction: {
+ assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
+ assert(InductionBinOp &&
+ (InductionBinOp->getOpcode() == Instruction::FAdd ||
+ InductionBinOp->getOpcode() == Instruction::FSub) &&
+ "Original bin op should be defined for FP induction");
+
+ Value *StepValue = cast<SCEVUnknown>(Step)->getValue();
+
+ // Floating point operations had to be 'fast' to enable the induction.
+ FastMathFlags Flags;
+ Flags.setUnsafeAlgebra();
+
+ Value *MulExp = B.CreateFMul(StepValue, Index);
+ if (isa<Instruction>(MulExp))
+ // We have to check, the MulExp may be a constant.
+ cast<Instruction>(MulExp)->setFastMathFlags(Flags);
+
+ Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue,
+ MulExp, "induction");
+ if (isa<Instruction>(BOp))
+ cast<Instruction>(BOp)->setFastMathFlags(Flags);
+
+ return BOp;
+ }
+ case IK_NoInduction:
+ return nullptr;
+ }
+ llvm_unreachable("invalid enum");
+}
+
+bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
+ ScalarEvolution *SE,
+ InductionDescriptor &D) {
+
+ // Here we only handle FP induction variables.
+ assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
+
+ if (TheLoop->getHeader() != Phi->getParent())
+ return false;
+
+ // The loop may have multiple entrances or multiple exits; we can analyze
+ // this phi if it has a unique entry value and a unique backedge value.
+ if (Phi->getNumIncomingValues() != 2)
+ return false;
+ Value *BEValue = nullptr, *StartValue = nullptr;
+ if (TheLoop->contains(Phi->getIncomingBlock(0))) {
+ BEValue = Phi->getIncomingValue(0);
+ StartValue = Phi->getIncomingValue(1);
+ } else {
+ assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
+ "Unexpected Phi node in the loop");
+ BEValue = Phi->getIncomingValue(1);
+ StartValue = Phi->getIncomingValue(0);
+ }
+
+ BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue);
+ if (!BOp)
+ return false;
+
+ Value *Addend = nullptr;
+ if (BOp->getOpcode() == Instruction::FAdd) {
+ if (BOp->getOperand(0) == Phi)
+ Addend = BOp->getOperand(1);
+ else if (BOp->getOperand(1) == Phi)
+ Addend = BOp->getOperand(0);
+ } else if (BOp->getOpcode() == Instruction::FSub)
+ if (BOp->getOperand(0) == Phi)
+ Addend = BOp->getOperand(1);
+
+ if (!Addend)
+ return false;
+
+ // The addend should be loop invariant
+ if (auto *I = dyn_cast<Instruction>(Addend))
+ if (TheLoop->contains(I))
+ return false;
+
+ // FP Step has unknown SCEV
+ const SCEV *Step = SE->getUnknown(Addend);
+ D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
+ return true;
+}
+
+bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
+ PredicatedScalarEvolution &PSE,
+ InductionDescriptor &D,
+ bool Assume) {
+ Type *PhiTy = Phi->getType();
+
+ // Handle integer and pointer inductions variables.
+ // Now we handle also FP induction but not trying to make a
+ // recurrent expression from the PHI node in-place.
+
+ if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() &&
+ !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
+ return false;
+
+ if (PhiTy->isFloatingPointTy())
+ return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
+
+ const SCEV *PhiScev = PSE.getSCEV(Phi);
+ const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+
+ // We need this expression to be an AddRecExpr.
+ if (Assume && !AR)
+ AR = PSE.getAsAddRec(Phi);
+
+ if (!AR) {
+ DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
+ return false;
+ }
+
+ return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
+}
+
+bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
+ ScalarEvolution *SE,
+ InductionDescriptor &D,
+ const SCEV *Expr) {
+ Type *PhiTy = Phi->getType();
+ // We only handle integer and pointer inductions variables.
+ if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
+ return false;
+
+ // Check that the PHI is consecutive.
+ const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+
+ if (!AR) {
+ DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
+ return false;
+ }
+
+ if (AR->getLoop() != TheLoop) {
+ // FIXME: We should treat this as a uniform. Unfortunately, we
+ // don't currently know how to handled uniform PHIs.
+ DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
+ return false;
+ }
+
+ Value *StartValue =
+ Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+ // Calculate the pointer stride and check if it is consecutive.
+ // The stride may be a constant or a loop invariant integer value.
+ const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
+ if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))
+ return false;
+
+ if (PhiTy->isIntegerTy()) {
+ D = InductionDescriptor(StartValue, IK_IntInduction, Step);
+ return true;
+ }
+
+ assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
+ // Pointer induction should be a constant.
+ if (!ConstStep)
+ return false;
+
+ ConstantInt *CV = ConstStep->getValue();
+ Type *PointerElementType = PhiTy->getPointerElementType();
+ // The pointer stride cannot be determined if the pointer element type is not
+ // sized.
+ if (!PointerElementType->isSized())
+ return false;
+
+ const DataLayout &DL = Phi->getModule()->getDataLayout();
+ int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
+ if (!Size)
+ return false;
+
+ int64_t CVSize = CV->getSExtValue();
+ if (CVSize % Size)
+ return false;
+ auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size,
+ true /* signed */);
+ D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
+ return true;
+}
+
+bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
+ bool PreserveLCSSA) {
+ bool Changed = false;
+
+ // We re-use a vector for the in-loop predecesosrs.
+ SmallVector<BasicBlock *, 4> InLoopPredecessors;
+
+ auto RewriteExit = [&](BasicBlock *BB) {
+ assert(InLoopPredecessors.empty() &&
+ "Must start with an empty predecessors list!");
+ auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });
+
+ // See if there are any non-loop predecessors of this exit block and
+ // keep track of the in-loop predecessors.
+ bool IsDedicatedExit = true;
+ for (auto *PredBB : predecessors(BB))
+ if (L->contains(PredBB)) {
+ if (isa<IndirectBrInst>(PredBB->getTerminator()))
+ // We cannot rewrite exiting edges from an indirectbr.
+ return false;
+
+ InLoopPredecessors.push_back(PredBB);
+ } else {
+ IsDedicatedExit = false;
+ }
+
+ assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
+
+ // Nothing to do if this is already a dedicated exit.
+ if (IsDedicatedExit)
+ return false;
+
+ auto *NewExitBB = SplitBlockPredecessors(
+ BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA);
+
+ if (!NewExitBB)
+ DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
+ << *L << "\n");
+ else
+ DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
+ << NewExitBB->getName() << "\n");
+ return true;
+ };
+
+ // Walk the exit blocks directly rather than building up a data structure for
+ // them, but only visit each one once.
+ SmallPtrSet<BasicBlock *, 4> Visited;
+ for (auto *BB : L->blocks())
+ for (auto *SuccBB : successors(BB)) {
+ // We're looking for exit blocks so skip in-loop successors.
+ if (L->contains(SuccBB))
+ continue;
+
+ // Visit each exit block exactly once.
+ if (!Visited.insert(SuccBB).second)
+ continue;
+
+ Changed |= RewriteExit(SuccBB);
+ }
+
+ return Changed;
+}
+
+/// \brief Returns the instructions that use values defined in the loop.
+SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {
+ SmallVector<Instruction *, 8> UsedOutside;
+
+ for (auto *Block : L->getBlocks())
+ // FIXME: I believe that this could use copy_if if the Inst reference could
+ // be adapted into a pointer.
+ for (auto &Inst : *Block) {
+ auto Users = Inst.users();
+ if (any_of(Users, [&](User *U) {
+ auto *Use = cast<Instruction>(U);
+ return !L->contains(Use->getParent());
+ }))
+ UsedOutside.push_back(&Inst);
+ }
+
+ return UsedOutside;
+}
+
+void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) {
+ // By definition, all loop passes need the LoopInfo analysis and the
+ // Dominator tree it depends on. Because they all participate in the loop
+ // pass manager, they must also preserve these.
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+
+ // We must also preserve LoopSimplify and LCSSA. We locally access their IDs
+ // here because users shouldn't directly get them from this header.
+ extern char &LoopSimplifyID;
+ extern char &LCSSAID;
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ // This is used in the LPPassManager to perform LCSSA verification on passes
+ // which preserve lcssa form
+ AU.addRequired<LCSSAVerificationPass>();
+ AU.addPreserved<LCSSAVerificationPass>();
+
+ // Loop passes are designed to run inside of a loop pass manager which means
+ // that any function analyses they require must be required by the first loop
+ // pass in the manager (so that it is computed before the loop pass manager
+ // runs) and preserved by all loop pasess in the manager. To make this
+ // reasonably robust, the set needed for most loop passes is maintained here.
+ // If your loop pass requires an analysis not listed here, you will need to
+ // carefully audit the loop pass manager nesting structure that results.
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+}
+
+/// Manually defined generic "LoopPass" dependency initialization. This is used
+/// to initialize the exact set of passes from above in \c
+/// getLoopAnalysisUsage. It can be used within a loop pass's initialization
+/// with:
+///
+/// INITIALIZE_PASS_DEPENDENCY(LoopPass)
+///
+/// As-if "LoopPass" were a pass.
+void llvm::initializeLoopPassPass(PassRegistry &Registry) {
+ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+ INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
+ INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+}
+
+/// \brief Find string metadata for loop
+///
+/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
+/// operand or null otherwise. If the string metadata is not found return
+/// Optional's not-a-value.
+Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop,
+ StringRef Name) {
+ MDNode *LoopID = TheLoop->getLoopID();
+ // Return none if LoopID is false.
+ if (!LoopID)
+ return None;
+
+ // First operand should refer to the loop id itself.
+ assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
+ assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+
+ // Iterate over LoopID operands and look for MDString Metadata
+ for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ if (!MD)
+ continue;
+ MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ if (!S)
+ continue;
+ // Return true if MDString holds expected MetaData.
+ if (Name.equals(S->getString()))
+ switch (MD->getNumOperands()) {
+ case 1:
+ return nullptr;
+ case 2:
+ return &MD->getOperand(1);
+ default:
+ llvm_unreachable("loop metadata has 0 or 1 operand");
+ }
+ }
+ return None;
+}
+
+/// Returns true if the instruction in a loop is guaranteed to execute at least
+/// once.
+bool llvm::isGuaranteedToExecute(const Instruction &Inst,
+ const DominatorTree *DT, const Loop *CurLoop,
+ const LoopSafetyInfo *SafetyInfo) {
+ // We have to check to make sure that the instruction dominates all
+ // of the exit blocks. If it doesn't, then there is a path out of the loop
+ // which does not execute this instruction, so we can't hoist it.
+
+ // If the instruction is in the header block for the loop (which is very
+ // common), it is always guaranteed to dominate the exit blocks. Since this
+ // is a common case, and can save some work, check it now.
+ if (Inst.getParent() == CurLoop->getHeader())
+ // If there's a throw in the header block, we can't guarantee we'll reach
+ // Inst.
+ return !SafetyInfo->HeaderMayThrow;
+
+ // Somewhere in this loop there is an instruction which may throw and make us
+ // exit the loop.
+ if (SafetyInfo->MayThrow)
+ return false;
+
+ // Get the exit blocks for the current loop.
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ CurLoop->getExitBlocks(ExitBlocks);
+
+ // Verify that the block dominates each of the exit blocks of the loop.
+ for (BasicBlock *ExitBlock : ExitBlocks)
+ if (!DT->dominates(Inst.getParent(), ExitBlock))
+ return false;
+
+ // As a degenerate case, if the loop is statically infinite then we haven't
+ // proven anything since there are no exit blocks.
+ if (ExitBlocks.empty())
+ return false;
+
+ // FIXME: In general, we have to prove that the loop isn't an infinite loop.
+ // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is
+ // just a special case of this.)
+ return true;
+}
+
+Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
+ // Only support loops with a unique exiting block, and a latch.
+ if (!L->getExitingBlock())
+ return None;
+
+ // Get the branch weights for the the loop's backedge.
+ BranchInst *LatchBR =
+ dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator());
+ if (!LatchBR || LatchBR->getNumSuccessors() != 2)
+ return None;
+
+ assert((LatchBR->getSuccessor(0) == L->getHeader() ||
+ LatchBR->getSuccessor(1) == L->getHeader()) &&
+ "At least one edge out of the latch must go to the header");
+
+ // To estimate the number of times the loop body was executed, we want to
+ // know the number of times the backedge was taken, vs. the number of times
+ // we exited the loop.
+ uint64_t TrueVal, FalseVal;
+ if (!LatchBR->extractProfMetadata(TrueVal, FalseVal))
+ return None;
+
+ if (!TrueVal || !FalseVal)
+ return 0;
+
+ // Divide the count of the backedge by the count of the edge exiting the loop,
+ // rounding to nearest.
+ if (LatchBR->getSuccessor(0) == L->getHeader())
+ return (TrueVal + (FalseVal / 2)) / FalseVal;
+ else
+ return (FalseVal + (TrueVal / 2)) / TrueVal;
+}
+
+/// \brief Adds a 'fast' flag to floating point operations.
+static Value *addFastMathFlag(Value *V) {
+ if (isa<FPMathOperator>(V)) {
+ FastMathFlags Flags;
+ Flags.setUnsafeAlgebra();
+ cast<Instruction>(V)->setFastMathFlags(Flags);
+ }
+ return V;
+}
+
+// Helper to generate a log2 shuffle reduction.
+Value *
+llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
+ RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
+ ArrayRef<Value *> RedOps) {
+ unsigned VF = Src->getType()->getVectorNumElements();
+ // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
+ // and vector ops, reducing the set of values being computed by half each
+ // round.
+ assert(isPowerOf2_32(VF) &&
+ "Reduction emission only supported for pow2 vectors!");
+ Value *TmpVec = Src;
+ SmallVector<Constant *, 32> ShuffleMask(VF, nullptr);
+ for (unsigned i = VF; i != 1; i >>= 1) {
+ // Move the upper half of the vector to the lower half.
+ for (unsigned j = 0; j != i / 2; ++j)
+ ShuffleMask[j] = Builder.getInt32(i / 2 + j);
+
+ // Fill the rest of the mask with undef.
+ std::fill(&ShuffleMask[i / 2], ShuffleMask.end(),
+ UndefValue::get(Builder.getInt32Ty()));
+
+ Value *Shuf = Builder.CreateShuffleVector(
+ TmpVec, UndefValue::get(TmpVec->getType()),
+ ConstantVector::get(ShuffleMask), "rdx.shuf");
+
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
+ // Floating point operations had to be 'fast' to enable the reduction.
+ TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op,
+ TmpVec, Shuf, "bin.rdx"));
+ } else {
+ assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
+ "Invalid min/max");
+ TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec,
+ Shuf);
+ }
+ if (!RedOps.empty())
+ propagateIRFlags(TmpVec, RedOps);
+ }
+ // The result is in the first element of the vector.
+ return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
+}
+
+/// Create a simple vector reduction specified by an opcode and some
+/// flags (if generating min/max reductions).
+Value *llvm::createSimpleTargetReduction(
+ IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode,
+ Value *Src, TargetTransformInfo::ReductionFlags Flags,
+ ArrayRef<Value *> RedOps) {
+ assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
+
+ Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType());
+ std::function<Value*()> BuildFunc;
+ using RD = RecurrenceDescriptor;
+ RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
+ // TODO: Support creating ordered reductions.
+ FastMathFlags FMFUnsafe;
+ FMFUnsafe.setUnsafeAlgebra();
+
+ switch (Opcode) {
+ case Instruction::Add:
+ BuildFunc = [&]() { return Builder.CreateAddReduce(Src); };
+ break;
+ case Instruction::Mul:
+ BuildFunc = [&]() { return Builder.CreateMulReduce(Src); };
+ break;
+ case Instruction::And:
+ BuildFunc = [&]() { return Builder.CreateAndReduce(Src); };
+ break;
+ case Instruction::Or:
+ BuildFunc = [&]() { return Builder.CreateOrReduce(Src); };
+ break;
+ case Instruction::Xor:
+ BuildFunc = [&]() { return Builder.CreateXorReduce(Src); };
+ break;
+ case Instruction::FAdd:
+ BuildFunc = [&]() {
+ auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src);
+ cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe);
+ return Rdx;
+ };
+ break;
+ case Instruction::FMul:
+ BuildFunc = [&]() {
+ auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src);
+ cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe);
+ return Rdx;
+ };
+ break;
+ case Instruction::ICmp:
+ if (Flags.IsMaxOp) {
+ MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax;
+ BuildFunc = [&]() {
+ return Builder.CreateIntMaxReduce(Src, Flags.IsSigned);
+ };
+ } else {
+ MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin;
+ BuildFunc = [&]() {
+ return Builder.CreateIntMinReduce(Src, Flags.IsSigned);
+ };
+ }
+ break;
+ case Instruction::FCmp:
+ if (Flags.IsMaxOp) {
+ MinMaxKind = RD::MRK_FloatMax;
+ BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); };
+ } else {
+ MinMaxKind = RD::MRK_FloatMin;
+ BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); };
+ }
+ break;
+ default:
+ llvm_unreachable("Unhandled opcode");
+ break;
+ }
+ if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags))
+ return BuildFunc();
+ return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps);
+}
+
+/// Create a vector reduction using a given recurrence descriptor.
+Value *llvm::createTargetReduction(IRBuilder<> &Builder,
+ const TargetTransformInfo *TTI,
+ RecurrenceDescriptor &Desc, Value *Src,
+ bool NoNaN) {
+ // TODO: Support in-order reductions based on the recurrence descriptor.
+ RecurrenceDescriptor::RecurrenceKind RecKind = Desc.getRecurrenceKind();
+ TargetTransformInfo::ReductionFlags Flags;
+ Flags.NoNaN = NoNaN;
+ auto getSimpleRdx = [&](unsigned Opc) {
+ return createSimpleTargetReduction(Builder, TTI, Opc, Src, Flags);
+ };
+ switch (RecKind) {
+ case RecurrenceDescriptor::RK_FloatAdd:
+ return getSimpleRdx(Instruction::FAdd);
+ case RecurrenceDescriptor::RK_FloatMult:
+ return getSimpleRdx(Instruction::FMul);
+ case RecurrenceDescriptor::RK_IntegerAdd:
+ return getSimpleRdx(Instruction::Add);
+ case RecurrenceDescriptor::RK_IntegerMult:
+ return getSimpleRdx(Instruction::Mul);
+ case RecurrenceDescriptor::RK_IntegerAnd:
+ return getSimpleRdx(Instruction::And);
+ case RecurrenceDescriptor::RK_IntegerOr:
+ return getSimpleRdx(Instruction::Or);
+ case RecurrenceDescriptor::RK_IntegerXor:
+ return getSimpleRdx(Instruction::Xor);
+ case RecurrenceDescriptor::RK_IntegerMinMax: {
+ switch (Desc.getMinMaxRecurrenceKind()) {
+ case RecurrenceDescriptor::MRK_SIntMax:
+ Flags.IsSigned = true;
+ Flags.IsMaxOp = true;
+ break;
+ case RecurrenceDescriptor::MRK_UIntMax:
+ Flags.IsMaxOp = true;
+ break;
+ case RecurrenceDescriptor::MRK_SIntMin:
+ Flags.IsSigned = true;
+ break;
+ case RecurrenceDescriptor::MRK_UIntMin:
+ break;
+ default:
+ llvm_unreachable("Unhandled MRK");
+ }
+ return getSimpleRdx(Instruction::ICmp);
+ }
+ case RecurrenceDescriptor::RK_FloatMinMax: {
+ Flags.IsMaxOp =
+ Desc.getMinMaxRecurrenceKind() == RecurrenceDescriptor::MRK_FloatMax;
+ return getSimpleRdx(Instruction::FCmp);
+ }
+ default:
+ llvm_unreachable("Unhandled RecKind");
+ }
+}
+
+void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
+ auto *VecOp = dyn_cast<Instruction>(I);
+ if (!VecOp)
+ return;
+ auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0])
+ : dyn_cast<Instruction>(OpValue);
+ if (!Intersection)
+ return;
+ const unsigned Opcode = Intersection->getOpcode();
+ VecOp->copyIRFlags(Intersection);
+ for (auto *V : VL) {
+ auto *Instr = dyn_cast<Instruction>(V);
+ if (!Instr)
+ continue;
+ if (OpValue == nullptr || Opcode == Instr->getOpcode())
+ VecOp->andIRFlags(V);
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp
new file mode 100644
index 000000000000..29756d9dab7f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -0,0 +1,323 @@
+//===- LoopVersioning.cpp - Utility to version a loop ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a utility class to perform loop versioning. The versioned
+// loop speculates that otherwise may-aliasing memory accesses don't overlap and
+// emits checks to prove this.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ AnnotateNoAlias("loop-version-annotate-no-alias", cl::init(true),
+ cl::Hidden,
+ cl::desc("Add no-alias annotation for instructions that "
+ "are disambiguated by memchecks"));
+
+LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
+ DominatorTree *DT, ScalarEvolution *SE,
+ bool UseLAIChecks)
+ : VersionedLoop(L), NonVersionedLoop(nullptr), LAI(LAI), LI(LI), DT(DT),
+ SE(SE) {
+ assert(L->getExitBlock() && "No single exit block");
+ assert(L->isLoopSimplifyForm() && "Loop is not in loop-simplify form");
+ if (UseLAIChecks) {
+ setAliasChecks(LAI.getRuntimePointerChecking()->getChecks());
+ setSCEVChecks(LAI.getPSE().getUnionPredicate());
+ }
+}
+
+void LoopVersioning::setAliasChecks(
+ SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks) {
+ AliasChecks = std::move(Checks);
+}
+
+void LoopVersioning::setSCEVChecks(SCEVUnionPredicate Check) {
+ Preds = std::move(Check);
+}
+
+void LoopVersioning::versionLoop(
+ const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
+ Instruction *FirstCheckInst;
+ Instruction *MemRuntimeCheck;
+ Value *SCEVRuntimeCheck;
+ Value *RuntimeCheck = nullptr;
+
+ // Add the memcheck in the original preheader (this is empty initially).
+ BasicBlock *RuntimeCheckBB = VersionedLoop->getLoopPreheader();
+ std::tie(FirstCheckInst, MemRuntimeCheck) =
+ LAI.addRuntimeChecks(RuntimeCheckBB->getTerminator(), AliasChecks);
+
+ const SCEVUnionPredicate &Pred = LAI.getPSE().getUnionPredicate();
+ SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(),
+ "scev.check");
+ SCEVRuntimeCheck =
+ Exp.expandCodeForPredicate(&Pred, RuntimeCheckBB->getTerminator());
+ auto *CI = dyn_cast<ConstantInt>(SCEVRuntimeCheck);
+
+ // Discard the SCEV runtime check if it is always true.
+ if (CI && CI->isZero())
+ SCEVRuntimeCheck = nullptr;
+
+ if (MemRuntimeCheck && SCEVRuntimeCheck) {
+ RuntimeCheck = BinaryOperator::Create(Instruction::Or, MemRuntimeCheck,
+ SCEVRuntimeCheck, "lver.safe");
+ if (auto *I = dyn_cast<Instruction>(RuntimeCheck))
+ I->insertBefore(RuntimeCheckBB->getTerminator());
+ } else
+ RuntimeCheck = MemRuntimeCheck ? MemRuntimeCheck : SCEVRuntimeCheck;
+
+ assert(RuntimeCheck && "called even though we don't need "
+ "any runtime checks");
+
+ // Rename the block to make the IR more readable.
+ RuntimeCheckBB->setName(VersionedLoop->getHeader()->getName() +
+ ".lver.check");
+
+ // Create empty preheader for the loop (and after cloning for the
+ // non-versioned loop).
+ BasicBlock *PH =
+ SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI);
+ PH->setName(VersionedLoop->getHeader()->getName() + ".ph");
+
+ // Clone the loop including the preheader.
+ //
+ // FIXME: This does not currently preserve SimplifyLoop because the exit
+ // block is a join between the two loops.
+ SmallVector<BasicBlock *, 8> NonVersionedLoopBlocks;
+ NonVersionedLoop =
+ cloneLoopWithPreheader(PH, RuntimeCheckBB, VersionedLoop, VMap,
+ ".lver.orig", LI, DT, NonVersionedLoopBlocks);
+ remapInstructionsInBlocks(NonVersionedLoopBlocks, VMap);
+
+ // Insert the conditional branch based on the result of the memchecks.
+ Instruction *OrigTerm = RuntimeCheckBB->getTerminator();
+ BranchInst::Create(NonVersionedLoop->getLoopPreheader(),
+ VersionedLoop->getLoopPreheader(), RuntimeCheck, OrigTerm);
+ OrigTerm->eraseFromParent();
+
+ // The loops merge in the original exit block. This is now dominated by the
+ // memchecking block.
+ DT->changeImmediateDominator(VersionedLoop->getExitBlock(), RuntimeCheckBB);
+
+ // Adds the necessary PHI nodes for the versioned loops based on the
+ // loop-defined values used outside of the loop.
+ addPHINodes(DefsUsedOutside);
+}
+
+void LoopVersioning::addPHINodes(
+ const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
+ BasicBlock *PHIBlock = VersionedLoop->getExitBlock();
+ assert(PHIBlock && "No single successor to loop exit block");
+ PHINode *PN;
+
+ // First add a single-operand PHI for each DefsUsedOutside if one does not
+ // exists yet.
+ for (auto *Inst : DefsUsedOutside) {
+ // See if we have a single-operand PHI with the value defined by the
+ // original loop.
+ for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) {
+ if (PN->getIncomingValue(0) == Inst)
+ break;
+ }
+ // If not create it.
+ if (!PN) {
+ PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver",
+ &PHIBlock->front());
+ for (auto *User : Inst->users())
+ if (!VersionedLoop->contains(cast<Instruction>(User)->getParent()))
+ User->replaceUsesOfWith(Inst, PN);
+ PN->addIncoming(Inst, VersionedLoop->getExitingBlock());
+ }
+ }
+
+ // Then for each PHI add the operand for the edge from the cloned loop.
+ for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) {
+ assert(PN->getNumOperands() == 1 &&
+ "Exit block should only have on predecessor");
+
+ // If the definition was cloned used that otherwise use the same value.
+ Value *ClonedValue = PN->getIncomingValue(0);
+ auto Mapped = VMap.find(ClonedValue);
+ if (Mapped != VMap.end())
+ ClonedValue = Mapped->second;
+
+ PN->addIncoming(ClonedValue, NonVersionedLoop->getExitingBlock());
+ }
+}
+
+void LoopVersioning::prepareNoAliasMetadata() {
+ // We need to turn the no-alias relation between pointer checking groups into
+ // no-aliasing annotations between instructions.
+ //
+ // We accomplish this by mapping each pointer checking group (a set of
+ // pointers memchecked together) to an alias scope and then also mapping each
+ // group to the list of scopes it can't alias.
+
+ const RuntimePointerChecking *RtPtrChecking = LAI.getRuntimePointerChecking();
+ LLVMContext &Context = VersionedLoop->getHeader()->getContext();
+
+ // First allocate an aliasing scope for each pointer checking group.
+ //
+ // While traversing through the checking groups in the loop, also create a
+ // reverse map from pointers to the pointer checking group they were assigned
+ // to.
+ MDBuilder MDB(Context);
+ MDNode *Domain = MDB.createAnonymousAliasScopeDomain("LVerDomain");
+
+ for (const auto &Group : RtPtrChecking->CheckingGroups) {
+ GroupToScope[&Group] = MDB.createAnonymousAliasScope(Domain);
+
+ for (unsigned PtrIdx : Group.Members)
+ PtrToGroup[RtPtrChecking->getPointerInfo(PtrIdx).PointerValue] = &Group;
+ }
+
+ // Go through the checks and for each pointer group, collect the scopes for
+ // each non-aliasing pointer group.
+ DenseMap<const RuntimePointerChecking::CheckingPtrGroup *,
+ SmallVector<Metadata *, 4>>
+ GroupToNonAliasingScopes;
+
+ for (const auto &Check : AliasChecks)
+ GroupToNonAliasingScopes[Check.first].push_back(GroupToScope[Check.second]);
+
+ // Finally, transform the above to actually map to scope list which is what
+ // the metadata uses.
+
+ for (auto Pair : GroupToNonAliasingScopes)
+ GroupToNonAliasingScopeList[Pair.first] = MDNode::get(Context, Pair.second);
+}
+
+void LoopVersioning::annotateLoopWithNoAlias() {
+ if (!AnnotateNoAlias)
+ return;
+
+ // First prepare the maps.
+ prepareNoAliasMetadata();
+
+ // Add the scope and no-alias metadata to the instructions.
+ for (Instruction *I : LAI.getDepChecker().getMemoryInstructions()) {
+ annotateInstWithNoAlias(I);
+ }
+}
+
+void LoopVersioning::annotateInstWithNoAlias(Instruction *VersionedInst,
+ const Instruction *OrigInst) {
+ if (!AnnotateNoAlias)
+ return;
+
+ LLVMContext &Context = VersionedLoop->getHeader()->getContext();
+ const Value *Ptr = isa<LoadInst>(OrigInst)
+ ? cast<LoadInst>(OrigInst)->getPointerOperand()
+ : cast<StoreInst>(OrigInst)->getPointerOperand();
+
+ // Find the group for the pointer and then add the scope metadata.
+ auto Group = PtrToGroup.find(Ptr);
+ if (Group != PtrToGroup.end()) {
+ VersionedInst->setMetadata(
+ LLVMContext::MD_alias_scope,
+ MDNode::concatenate(
+ VersionedInst->getMetadata(LLVMContext::MD_alias_scope),
+ MDNode::get(Context, GroupToScope[Group->second])));
+
+ // Add the no-alias metadata.
+ auto NonAliasingScopeList = GroupToNonAliasingScopeList.find(Group->second);
+ if (NonAliasingScopeList != GroupToNonAliasingScopeList.end())
+ VersionedInst->setMetadata(
+ LLVMContext::MD_noalias,
+ MDNode::concatenate(
+ VersionedInst->getMetadata(LLVMContext::MD_noalias),
+ NonAliasingScopeList->second));
+ }
+}
+
+namespace {
+/// \brief Also expose this is a pass. Currently this is only used for
+/// unit-testing. It adds all memchecks necessary to remove all may-aliasing
+/// array accesses from the loop.
+class LoopVersioningPass : public FunctionPass {
+public:
+ LoopVersioningPass() : FunctionPass(ID) {
+ initializeLoopVersioningPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+
+ // Build up a worklist of inner-loops to version. This is necessary as the
+ // act of versioning a loop creates new loops and can invalidate iterators
+ // across the loops.
+ SmallVector<Loop *, 8> Worklist;
+
+ for (Loop *TopLevelLoop : *LI)
+ for (Loop *L : depth_first(TopLevelLoop))
+ // We only handle inner-most loops.
+ if (L->empty())
+ Worklist.push_back(L);
+
+ // Now walk the identified inner loops.
+ bool Changed = false;
+ for (Loop *L : Worklist) {
+ const LoopAccessInfo &LAI = LAA->getInfo(L);
+ if (L->isLoopSimplifyForm() && (LAI.getNumRuntimePointerChecks() ||
+ !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) {
+ LoopVersioning LVer(LAI, L, LI, DT, SE);
+ LVer.versionLoop();
+ LVer.annotateLoopWithNoAlias();
+ Changed = true;
+ }
+ }
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<LoopAccessLegacyAnalysis>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ }
+
+ static char ID;
+};
+}
+
+#define LVER_OPTION "loop-versioning"
+#define DEBUG_TYPE LVER_OPTION
+
+char LoopVersioningPass::ID;
+static const char LVer_name[] = "Loop Versioning";
+
+INITIALIZE_PASS_BEGIN(LoopVersioningPass, LVER_OPTION, LVer_name, false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(LoopVersioningPass, LVER_OPTION, LVer_name, false, false)
+
+namespace llvm {
+FunctionPass *createLoopVersioningPass() {
+ return new LoopVersioningPass();
+}
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
new file mode 100644
index 000000000000..ee84541e526d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -0,0 +1,94 @@
+//===- LowerInvoke.cpp - Eliminate Invoke instructions --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which do not yet
+// support stack unwinding. This pass converts 'invoke' instructions to 'call'
+// instructions, so that any exception-handling 'landingpad' blocks become dead
+// code (which can be removed by running the '-simplifycfg' pass afterwards).
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LowerInvoke.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "lowerinvoke"
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+
+namespace {
+ class LowerInvokeLegacyPass : public FunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit LowerInvokeLegacyPass() : FunctionPass(ID) {
+ initializeLowerInvokeLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override;
+ };
+}
+
+char LowerInvokeLegacyPass::ID = 0;
+INITIALIZE_PASS(LowerInvokeLegacyPass, "lowerinvoke",
+ "Lower invoke and unwind, for unwindless code generators",
+ false, false)
+
+static bool runImpl(Function &F) {
+ bool Changed = false;
+ for (BasicBlock &BB : F)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB.getTerminator())) {
+ SmallVector<Value *, 16> CallArgs(II->op_begin(), II->op_end() - 3);
+ // Insert a normal call instruction...
+ CallInst *NewCall =
+ CallInst::Create(II->getCalledValue(), CallArgs, "", II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
+ II->replaceAllUsesWith(NewCall);
+
+ // Insert an unconditional branch to the normal destination.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Remove any PHI node entries from the exception destination.
+ II->getUnwindDest()->removePredecessor(&BB);
+
+ // Remove the invoke instruction now.
+ BB.getInstList().erase(II);
+
+ ++NumInvokes;
+ Changed = true;
+ }
+ return Changed;
+}
+
+bool LowerInvokeLegacyPass::runOnFunction(Function &F) {
+ return runImpl(F);
+}
+
+namespace llvm {
+char &LowerInvokePassID = LowerInvokeLegacyPass::ID;
+
+// Public Interface To the LowerInvoke pass.
+FunctionPass *createLowerInvokePass() { return new LowerInvokeLegacyPass(); }
+
+PreservedAnalyses LowerInvokePass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ bool Changed = runImpl(F);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
new file mode 100644
index 000000000000..900450b40061
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -0,0 +1,510 @@
+//===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+static unsigned getLoopOperandSizeInBytes(Type *Type) {
+ if (VectorType *VTy = dyn_cast<VectorType>(Type)) {
+ return VTy->getBitWidth() / 8;
+ }
+
+ return Type->getPrimitiveSizeInBits() / 8;
+}
+
+void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
+ Value *DstAddr, ConstantInt *CopyLen,
+ unsigned SrcAlign, unsigned DestAlign,
+ bool SrcIsVolatile, bool DstIsVolatile,
+ const TargetTransformInfo &TTI) {
+ // No need to expand zero length copies.
+ if (CopyLen->isZero())
+ return;
+
+ BasicBlock *PreLoopBB = InsertBefore->getParent();
+ BasicBlock *PostLoopBB = nullptr;
+ Function *ParentFunc = PreLoopBB->getParent();
+ LLVMContext &Ctx = PreLoopBB->getContext();
+
+ Type *TypeOfCopyLen = CopyLen->getType();
+ Type *LoopOpType =
+ TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign);
+
+ unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType);
+ uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize;
+
+ unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
+ unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
+
+ if (LoopEndCount != 0) {
+ // Split
+ PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split");
+ BasicBlock *LoopBB =
+ BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB);
+ PreLoopBB->getTerminator()->setSuccessor(0, LoopBB);
+
+ IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
+
+ // Cast the Src and Dst pointers to pointers to the loop operand type (if
+ // needed).
+ PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
+ PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
+ if (SrcAddr->getType() != SrcOpType) {
+ SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
+ }
+ if (DstAddr->getType() != DstOpType) {
+ DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
+ }
+
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index");
+ LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB);
+ // Loop Body
+ Value *SrcGEP =
+ LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
+ Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+ Value *DstGEP =
+ LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
+ LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
+
+ Value *NewIndex =
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U));
+ LoopIndex->addIncoming(NewIndex, LoopBB);
+
+ // Create the loop branch condition.
+ Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount);
+ LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI),
+ LoopBB, PostLoopBB);
+ }
+
+ uint64_t BytesCopied = LoopEndCount * LoopOpSize;
+ uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied;
+ if (RemainingBytes) {
+ IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI()
+ : InsertBefore);
+
+ // Update the alignment based on the copy size used in the loop body.
+ SrcAlign = std::min(SrcAlign, LoopOpSize);
+ DestAlign = std::min(DestAlign, LoopOpSize);
+
+ SmallVector<Type *, 5> RemainingOps;
+ TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
+ SrcAlign, DestAlign);
+
+ for (auto OpTy : RemainingOps) {
+ // Calaculate the new index
+ unsigned OperandSize = getLoopOperandSizeInBytes(OpTy);
+ uint64_t GepIndex = BytesCopied / OperandSize;
+ assert(GepIndex * OperandSize == BytesCopied &&
+ "Division should have no Remainder!");
+ // Cast source to operand type and load
+ PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS);
+ Value *CastedSrc = SrcAddr->getType() == SrcPtrType
+ ? SrcAddr
+ : RBuilder.CreateBitCast(SrcAddr, SrcPtrType);
+ Value *SrcGEP = RBuilder.CreateInBoundsGEP(
+ OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex));
+ Value *Load = RBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+
+ // Cast destination to operand type and store.
+ PointerType *DstPtrType = PointerType::get(OpTy, DstAS);
+ Value *CastedDst = DstAddr->getType() == DstPtrType
+ ? DstAddr
+ : RBuilder.CreateBitCast(DstAddr, DstPtrType);
+ Value *DstGEP = RBuilder.CreateInBoundsGEP(
+ OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex));
+ RBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
+
+ BytesCopied += OperandSize;
+ }
+ }
+ assert(BytesCopied == CopyLen->getZExtValue() &&
+ "Bytes copied should match size in the call!");
+}
+
+void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
+ Value *SrcAddr, Value *DstAddr,
+ Value *CopyLen, unsigned SrcAlign,
+ unsigned DestAlign, bool SrcIsVolatile,
+ bool DstIsVolatile,
+ const TargetTransformInfo &TTI) {
+ BasicBlock *PreLoopBB = InsertBefore->getParent();
+ BasicBlock *PostLoopBB =
+ PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion");
+
+ Function *ParentFunc = PreLoopBB->getParent();
+ LLVMContext &Ctx = PreLoopBB->getContext();
+
+ Type *LoopOpType =
+ TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign);
+ unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType);
+
+ IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
+
+ unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
+ unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
+ PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
+ PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
+ if (SrcAddr->getType() != SrcOpType) {
+ SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
+ }
+ if (DstAddr->getType() != DstOpType) {
+ DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
+ }
+
+ // Calculate the loop trip count, and remaining bytes to copy after the loop.
+ Type *CopyLenType = CopyLen->getType();
+ IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType);
+ assert(ILengthType &&
+ "expected size argument to memcpy to be an integer type!");
+ ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
+ Value *RuntimeLoopCount = PLBuilder.CreateUDiv(CopyLen, CILoopOpSize);
+ Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize);
+ Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
+
+ BasicBlock *LoopBB =
+ BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, nullptr);
+ IRBuilder<> LoopBuilder(LoopBB);
+
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index");
+ LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB);
+
+ Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
+ Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+ Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
+ LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
+
+ Value *NewIndex =
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U));
+ LoopIndex->addIncoming(NewIndex, LoopBB);
+
+ Type *Int8Type = Type::getInt8Ty(Ctx);
+ if (LoopOpType != Int8Type) {
+ // Loop body for the residual copy.
+ BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual",
+ PreLoopBB->getParent(), nullptr);
+ // Residual loop header.
+ BasicBlock *ResHeaderBB = BasicBlock::Create(
+ Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr);
+
+ // Need to update the pre-loop basic block to branch to the correct place.
+ // branch to the main loop if the count is non-zero, branch to the residual
+ // loop if the copy size is smaller then 1 iteration of the main loop but
+ // non-zero and finally branch to after the residual loop if the memcpy
+ // size is zero.
+ ConstantInt *Zero = ConstantInt::get(ILengthType, 0U);
+ PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
+ LoopBB, ResHeaderBB);
+ PreLoopBB->getTerminator()->eraseFromParent();
+
+ LoopBuilder.CreateCondBr(
+ LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
+ ResHeaderBB);
+
+ // Determine if we need to branch to the residual loop or bypass it.
+ IRBuilder<> RHBuilder(ResHeaderBB);
+ RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero),
+ ResLoopBB, PostLoopBB);
+
+ // Copy the residual with single byte load/store loop.
+ IRBuilder<> ResBuilder(ResLoopBB);
+ PHINode *ResidualIndex =
+ ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index");
+ ResidualIndex->addIncoming(Zero, ResHeaderBB);
+
+ Value *SrcAsInt8 =
+ ResBuilder.CreateBitCast(SrcAddr, PointerType::get(Int8Type, SrcAS));
+ Value *DstAsInt8 =
+ ResBuilder.CreateBitCast(DstAddr, PointerType::get(Int8Type, DstAS));
+ Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex);
+ Value *SrcGEP =
+ ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset);
+ Value *Load = ResBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+ Value *DstGEP =
+ ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset);
+ ResBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
+
+ Value *ResNewIndex =
+ ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U));
+ ResidualIndex->addIncoming(ResNewIndex, ResLoopBB);
+
+ // Create the loop branch condition.
+ ResBuilder.CreateCondBr(
+ ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB,
+ PostLoopBB);
+ } else {
+ // In this case the loop operand type was a byte, and there is no need for a
+ // residual loop to copy the remaining memory after the main loop.
+ // We do however need to patch up the control flow by creating the
+ // terminators for the preloop block and the memcpy loop.
+ ConstantInt *Zero = ConstantInt::get(ILengthType, 0U);
+ PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
+ LoopBB, PostLoopBB);
+ PreLoopBB->getTerminator()->eraseFromParent();
+ LoopBuilder.CreateCondBr(
+ LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
+ PostLoopBB);
+ }
+}
+
+void llvm::createMemCpyLoop(Instruction *InsertBefore,
+ Value *SrcAddr, Value *DstAddr, Value *CopyLen,
+ unsigned SrcAlign, unsigned DestAlign,
+ bool SrcIsVolatile, bool DstIsVolatile) {
+ Type *TypeOfCopyLen = CopyLen->getType();
+
+ BasicBlock *OrigBB = InsertBefore->getParent();
+ Function *F = OrigBB->getParent();
+ BasicBlock *NewBB =
+ InsertBefore->getParent()->splitBasicBlock(InsertBefore, "split");
+ BasicBlock *LoopBB = BasicBlock::Create(F->getContext(), "loadstoreloop",
+ F, NewBB);
+
+ IRBuilder<> Builder(OrigBB->getTerminator());
+
+ // SrcAddr and DstAddr are expected to be pointer types,
+ // so no check is made here.
+ unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
+ unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
+
+ // Cast pointers to (char *)
+ SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS));
+ DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS));
+
+ Builder.CreateCondBr(
+ Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
+ LoopBB);
+ OrigBB->getTerminator()->eraseFromParent();
+
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
+ LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
+
+ // load from SrcAddr+LoopIndex
+ // TODO: we can leverage the align parameter of llvm.memcpy for more efficient
+ // word-sized loads and stores.
+ Value *Element =
+ LoopBuilder.CreateLoad(LoopBuilder.CreateInBoundsGEP(
+ LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex),
+ SrcIsVolatile);
+ // store at DstAddr+LoopIndex
+ LoopBuilder.CreateStore(Element,
+ LoopBuilder.CreateInBoundsGEP(LoopBuilder.getInt8Ty(),
+ DstAddr, LoopIndex),
+ DstIsVolatile);
+
+ // The value for LoopIndex coming from backedge is (LoopIndex + 1)
+ Value *NewIndex =
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
+ LoopIndex->addIncoming(NewIndex, LoopBB);
+
+ LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
+ NewBB);
+}
+
+// Lower memmove to IR. memmove is required to correctly copy overlapping memory
+// regions; therefore, it has to check the relative positions of the source and
+// destination pointers and choose the copy direction accordingly.
+//
+// The code below is an IR rendition of this C function:
+//
+// void* memmove(void* dst, const void* src, size_t n) {
+// unsigned char* d = dst;
+// const unsigned char* s = src;
+// if (s < d) {
+// // copy backwards
+// while (n--) {
+// d[n] = s[n];
+// }
+// } else {
+// // copy forward
+// for (size_t i = 0; i < n; ++i) {
+// d[i] = s[i];
+// }
+// }
+// return dst;
+// }
+static void createMemMoveLoop(Instruction *InsertBefore,
+ Value *SrcAddr, Value *DstAddr, Value *CopyLen,
+ unsigned SrcAlign, unsigned DestAlign,
+ bool SrcIsVolatile, bool DstIsVolatile) {
+ Type *TypeOfCopyLen = CopyLen->getType();
+ BasicBlock *OrigBB = InsertBefore->getParent();
+ Function *F = OrigBB->getParent();
+
+ // Create the a comparison of src and dst, based on which we jump to either
+ // the forward-copy part of the function (if src >= dst) or the backwards-copy
+ // part (if src < dst).
+ // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
+ // structure. Its block terminators (unconditional branches) are replaced by
+ // the appropriate conditional branches when the loop is built.
+ ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT,
+ SrcAddr, DstAddr, "compare_src_dst");
+ TerminatorInst *ThenTerm, *ElseTerm;
+ SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm,
+ &ElseTerm);
+
+ // Each part of the function consists of two blocks:
+ // copy_backwards: used to skip the loop when n == 0
+ // copy_backwards_loop: the actual backwards loop BB
+ // copy_forward: used to skip the loop when n == 0
+ // copy_forward_loop: the actual forward loop BB
+ BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
+ CopyBackwardsBB->setName("copy_backwards");
+ BasicBlock *CopyForwardBB = ElseTerm->getParent();
+ CopyForwardBB->setName("copy_forward");
+ BasicBlock *ExitBB = InsertBefore->getParent();
+ ExitBB->setName("memmove_done");
+
+ // Initial comparison of n == 0 that lets us skip the loops altogether. Shared
+ // between both backwards and forward copy clauses.
+ ICmpInst *CompareN =
+ new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen,
+ ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0");
+
+ // Copying backwards.
+ BasicBlock *LoopBB =
+ BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB);
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
+ Value *IndexPtr = LoopBuilder.CreateSub(
+ LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr");
+ Value *Element = LoopBuilder.CreateLoad(
+ LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element");
+ LoopBuilder.CreateStore(Element,
+ LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr));
+ LoopBuilder.CreateCondBr(
+ LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)),
+ ExitBB, LoopBB);
+ LoopPhi->addIncoming(IndexPtr, LoopBB);
+ LoopPhi->addIncoming(CopyLen, CopyBackwardsBB);
+ BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm);
+ ThenTerm->eraseFromParent();
+
+ // Copying forward.
+ BasicBlock *FwdLoopBB =
+ BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB);
+ IRBuilder<> FwdLoopBuilder(FwdLoopBB);
+ PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr");
+ Value *FwdElement = FwdLoopBuilder.CreateLoad(
+ FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element");
+ FwdLoopBuilder.CreateStore(
+ FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi));
+ Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd(
+ FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment");
+ FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen),
+ ExitBB, FwdLoopBB);
+ FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB);
+ FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB);
+
+ BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm);
+ ElseTerm->eraseFromParent();
+}
+
+static void createMemSetLoop(Instruction *InsertBefore,
+ Value *DstAddr, Value *CopyLen, Value *SetValue,
+ unsigned Align, bool IsVolatile) {
+ Type *TypeOfCopyLen = CopyLen->getType();
+ BasicBlock *OrigBB = InsertBefore->getParent();
+ Function *F = OrigBB->getParent();
+ BasicBlock *NewBB =
+ OrigBB->splitBasicBlock(InsertBefore, "split");
+ BasicBlock *LoopBB
+ = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB);
+
+ IRBuilder<> Builder(OrigBB->getTerminator());
+
+ // Cast pointer to the type of value getting stored
+ unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
+ DstAddr = Builder.CreateBitCast(DstAddr,
+ PointerType::get(SetValue->getType(), dstAS));
+
+ Builder.CreateCondBr(
+ Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
+ LoopBB);
+ OrigBB->getTerminator()->eraseFromParent();
+
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
+ LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
+
+ LoopBuilder.CreateStore(
+ SetValue,
+ LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex),
+ IsVolatile);
+
+ Value *NewIndex =
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
+ LoopIndex->addIncoming(NewIndex, LoopBB);
+
+ LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
+ NewBB);
+}
+
+void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
+ const TargetTransformInfo &TTI) {
+ // Original implementation
+ if (!TTI.useWideIRMemcpyLoopLowering()) {
+ createMemCpyLoop(/* InsertBefore */ Memcpy,
+ /* SrcAddr */ Memcpy->getRawSource(),
+ /* DstAddr */ Memcpy->getRawDest(),
+ /* CopyLen */ Memcpy->getLength(),
+ /* SrcAlign */ Memcpy->getAlignment(),
+ /* DestAlign */ Memcpy->getAlignment(),
+ /* SrcIsVolatile */ Memcpy->isVolatile(),
+ /* DstIsVolatile */ Memcpy->isVolatile());
+ } else {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
+ createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy,
+ /* SrcAddr */ Memcpy->getRawSource(),
+ /* DstAddr */ Memcpy->getRawDest(),
+ /* CopyLen */ CI,
+ /* SrcAlign */ Memcpy->getAlignment(),
+ /* DestAlign */ Memcpy->getAlignment(),
+ /* SrcIsVolatile */ Memcpy->isVolatile(),
+ /* DstIsVolatile */ Memcpy->isVolatile(),
+ /* TargetTransformInfo */ TTI);
+ } else {
+ createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy,
+ /* SrcAddr */ Memcpy->getRawSource(),
+ /* DstAddr */ Memcpy->getRawDest(),
+ /* CopyLen */ Memcpy->getLength(),
+ /* SrcAlign */ Memcpy->getAlignment(),
+ /* DestAlign */ Memcpy->getAlignment(),
+ /* SrcIsVolatile */ Memcpy->isVolatile(),
+ /* DstIsVolatile */ Memcpy->isVolatile(),
+ /* TargetTransfomrInfo */ TTI);
+ }
+ }
+}
+
+void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) {
+ createMemMoveLoop(/* InsertBefore */ Memmove,
+ /* SrcAddr */ Memmove->getRawSource(),
+ /* DstAddr */ Memmove->getRawDest(),
+ /* CopyLen */ Memmove->getLength(),
+ /* SrcAlign */ Memmove->getAlignment(),
+ /* DestAlign */ Memmove->getAlignment(),
+ /* SrcIsVolatile */ Memmove->isVolatile(),
+ /* DstIsVolatile */ Memmove->isVolatile());
+}
+
+void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
+ createMemSetLoop(/* InsertBefore */ Memset,
+ /* DstAddr */ Memset->getRawDest(),
+ /* CopyLen */ Memset->getLength(),
+ /* SetValue */ Memset->getValue(),
+ /* Alignment */ Memset->getAlignment(),
+ Memset->isVolatile());
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
new file mode 100644
index 000000000000..890afbc46e63
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -0,0 +1,531 @@
+//===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LowerSwitch transformation rewrites switch instructions with a sequence
+// of branches, which allows targets to get away with not implementing the
+// switch instruction until it is convenient.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "lower-switch"
+
+namespace {
+ struct IntRange {
+ int64_t Low, High;
+ };
+ // Return true iff R is covered by Ranges.
+ static bool IsInRanges(const IntRange &R,
+ const std::vector<IntRange> &Ranges) {
+ // Note: Ranges must be sorted, non-overlapping and non-adjacent.
+
+ // Find the first range whose High field is >= R.High,
+ // then check if the Low field is <= R.Low. If so, we
+ // have a Range that covers R.
+ auto I = std::lower_bound(
+ Ranges.begin(), Ranges.end(), R,
+ [](const IntRange &A, const IntRange &B) { return A.High < B.High; });
+ return I != Ranges.end() && I->Low <= R.Low;
+ }
+
+ /// Replace all SwitchInst instructions with chained branch instructions.
+ class LowerSwitch : public FunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ LowerSwitch() : FunctionPass(ID) {
+ initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ struct CaseRange {
+ ConstantInt* Low;
+ ConstantInt* High;
+ BasicBlock* BB;
+
+ CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb)
+ : Low(low), High(high), BB(bb) {}
+ };
+
+ typedef std::vector<CaseRange> CaseVector;
+ typedef std::vector<CaseRange>::iterator CaseItr;
+ private:
+ void processSwitchInst(SwitchInst *SI, SmallPtrSetImpl<BasicBlock*> &DeleteList);
+
+ BasicBlock *switchConvert(CaseItr Begin, CaseItr End,
+ ConstantInt *LowerBound, ConstantInt *UpperBound,
+ Value *Val, BasicBlock *Predecessor,
+ BasicBlock *OrigBlock, BasicBlock *Default,
+ const std::vector<IntRange> &UnreachableRanges);
+ BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, BasicBlock *OrigBlock,
+ BasicBlock *Default);
+ unsigned Clusterify(CaseVector &Cases, SwitchInst *SI);
+ };
+
+ /// The comparison function for sorting the switch case values in the vector.
+ /// WARNING: Case ranges should be disjoint!
+ struct CaseCmp {
+ bool operator () (const LowerSwitch::CaseRange& C1,
+ const LowerSwitch::CaseRange& C2) {
+
+ const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+ const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+ return CI1->getValue().slt(CI2->getValue());
+ }
+ };
+}
+
+char LowerSwitch::ID = 0;
+INITIALIZE_PASS(LowerSwitch, "lowerswitch",
+ "Lower SwitchInst's to branches", false, false)
+
+// Publicly exposed interface to pass...
+char &llvm::LowerSwitchID = LowerSwitch::ID;
+// createLowerSwitchPass - Interface to this file...
+FunctionPass *llvm::createLowerSwitchPass() {
+ return new LowerSwitch();
+}
+
+bool LowerSwitch::runOnFunction(Function &F) {
+ bool Changed = false;
+ SmallPtrSet<BasicBlock*, 8> DeleteList;
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *Cur = &*I++; // Advance over block so we don't traverse new blocks
+
+ // If the block is a dead Default block that will be deleted later, don't
+ // waste time processing it.
+ if (DeleteList.count(Cur))
+ continue;
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
+ Changed = true;
+ processSwitchInst(SI, DeleteList);
+ }
+ }
+
+ for (BasicBlock* BB: DeleteList) {
+ DeleteDeadBlock(BB);
+ }
+
+ return Changed;
+}
+
+/// Used for debugging purposes.
+static raw_ostream& operator<<(raw_ostream &O,
+ const LowerSwitch::CaseVector &C)
+ LLVM_ATTRIBUTE_USED;
+static raw_ostream& operator<<(raw_ostream &O,
+ const LowerSwitch::CaseVector &C) {
+ O << "[";
+
+ for (LowerSwitch::CaseVector::const_iterator B = C.begin(),
+ E = C.end(); B != E; ) {
+ O << *B->Low << " -" << *B->High;
+ if (++B != E) O << ", ";
+ }
+
+ return O << "]";
+}
+
+/// \brief Update the first occurrence of the "switch statement" BB in the PHI
+/// node with the "new" BB. The other occurrences will:
+///
+/// 1) Be updated by subsequent calls to this function. Switch statements may
+/// have more than one outcoming edge into the same BB if they all have the same
+/// value. When the switch statement is converted these incoming edges are now
+/// coming from multiple BBs.
+/// 2) Removed if subsequent incoming values now share the same case, i.e.,
+/// multiple outcome edges are condensed into one. This is necessary to keep the
+/// number of phi values equal to the number of branches to SuccBB.
+static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
+ unsigned NumMergedCases) {
+ for (BasicBlock::iterator I = SuccBB->begin(),
+ IE = SuccBB->getFirstNonPHI()->getIterator();
+ I != IE; ++I) {
+ PHINode *PN = cast<PHINode>(I);
+
+ // Only update the first occurrence.
+ unsigned Idx = 0, E = PN->getNumIncomingValues();
+ unsigned LocalNumMergedCases = NumMergedCases;
+ for (; Idx != E; ++Idx) {
+ if (PN->getIncomingBlock(Idx) == OrigBB) {
+ PN->setIncomingBlock(Idx, NewBB);
+ break;
+ }
+ }
+
+ // Remove additional occurrences coming from condensed cases and keep the
+ // number of incoming values equal to the number of branches to SuccBB.
+ SmallVector<unsigned, 8> Indices;
+ for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx)
+ if (PN->getIncomingBlock(Idx) == OrigBB) {
+ Indices.push_back(Idx);
+ LocalNumMergedCases--;
+ }
+ // Remove incoming values in the reverse order to prevent invalidating
+ // *successive* index.
+ for (unsigned III : reverse(Indices))
+ PN->removeIncomingValue(III);
+ }
+}
+
+/// Convert the switch statement into a binary lookup of the case values.
+/// The function recursively builds this tree. LowerBound and UpperBound are
+/// used to keep track of the bounds for Val that have already been checked by
+/// a block emitted by one of the previous calls to switchConvert in the call
+/// stack.
+BasicBlock *
+LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
+ ConstantInt *UpperBound, Value *Val,
+ BasicBlock *Predecessor, BasicBlock *OrigBlock,
+ BasicBlock *Default,
+ const std::vector<IntRange> &UnreachableRanges) {
+ unsigned Size = End - Begin;
+
+ if (Size == 1) {
+ // Check if the Case Range is perfectly squeezed in between
+ // already checked Upper and Lower bounds. If it is then we can avoid
+ // emitting the code that checks if the value actually falls in the range
+ // because the bounds already tell us so.
+ if (Begin->Low == LowerBound && Begin->High == UpperBound) {
+ unsigned NumMergedCases = 0;
+ if (LowerBound && UpperBound)
+ NumMergedCases =
+ UpperBound->getSExtValue() - LowerBound->getSExtValue();
+ fixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases);
+ return Begin->BB;
+ }
+ return newLeafBlock(*Begin, Val, OrigBlock, Default);
+ }
+
+ unsigned Mid = Size / 2;
+ std::vector<CaseRange> LHS(Begin, Begin + Mid);
+ DEBUG(dbgs() << "LHS: " << LHS << "\n");
+ std::vector<CaseRange> RHS(Begin + Mid, End);
+ DEBUG(dbgs() << "RHS: " << RHS << "\n");
+
+ CaseRange &Pivot = *(Begin + Mid);
+ DEBUG(dbgs() << "Pivot ==> "
+ << Pivot.Low->getValue()
+ << " -" << Pivot.High->getValue() << "\n");
+
+ // NewLowerBound here should never be the integer minimal value.
+ // This is because it is computed from a case range that is never
+ // the smallest, so there is always a case range that has at least
+ // a smaller value.
+ ConstantInt *NewLowerBound = Pivot.Low;
+
+ // Because NewLowerBound is never the smallest representable integer
+ // it is safe here to subtract one.
+ ConstantInt *NewUpperBound = ConstantInt::get(NewLowerBound->getContext(),
+ NewLowerBound->getValue() - 1);
+
+ if (!UnreachableRanges.empty()) {
+ // Check if the gap between LHS's highest and NewLowerBound is unreachable.
+ int64_t GapLow = LHS.back().High->getSExtValue() + 1;
+ int64_t GapHigh = NewLowerBound->getSExtValue() - 1;
+ IntRange Gap = { GapLow, GapHigh };
+ if (GapHigh >= GapLow && IsInRanges(Gap, UnreachableRanges))
+ NewUpperBound = LHS.back().High;
+ }
+
+ DEBUG(dbgs() << "LHS Bounds ==> ";
+ if (LowerBound) {
+ dbgs() << LowerBound->getSExtValue();
+ } else {
+ dbgs() << "NONE";
+ }
+ dbgs() << " - " << NewUpperBound->getSExtValue() << "\n";
+ dbgs() << "RHS Bounds ==> ";
+ dbgs() << NewLowerBound->getSExtValue() << " - ";
+ if (UpperBound) {
+ dbgs() << UpperBound->getSExtValue() << "\n";
+ } else {
+ dbgs() << "NONE\n";
+ });
+
+ // Create a new node that checks if the value is < pivot. Go to the
+ // left branch if it is and right branch if not.
+ Function* F = OrigBlock->getParent();
+ BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock");
+
+ ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT,
+ Val, Pivot.Low, "Pivot");
+
+ BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound,
+ NewUpperBound, Val, NewNode, OrigBlock,
+ Default, UnreachableRanges);
+ BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound,
+ UpperBound, Val, NewNode, OrigBlock,
+ Default, UnreachableRanges);
+
+ F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewNode);
+ NewNode->getInstList().push_back(Comp);
+
+ BranchInst::Create(LBranch, RBranch, Comp, NewNode);
+ return NewNode;
+}
+
+/// Create a new leaf block for the binary lookup tree. It checks if the
+/// switch's value == the case's value. If not, then it jumps to the default
+/// branch. At this point in the tree, the value can't be another valid case
+/// value, so the jump to the "default" branch is warranted.
+BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
+ BasicBlock* OrigBlock,
+ BasicBlock* Default)
+{
+ Function* F = OrigBlock->getParent();
+ BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
+ F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf);
+
+ // Emit comparison
+ ICmpInst* Comp = nullptr;
+ if (Leaf.Low == Leaf.High) {
+ // Make the seteq instruction...
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val,
+ Leaf.Low, "SwitchLeaf");
+ } else {
+ // Make range comparison
+ if (Leaf.Low->isMinValue(true /*isSigned*/)) {
+ // Val >= Min && Val <= Hi --> Val <= Hi
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
+ "SwitchLeaf");
+ } else if (Leaf.Low->isZero()) {
+ // Val >= 0 && Val <= Hi --> Val <=u Hi
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
+ "SwitchLeaf");
+ } else {
+ // Emit V-Lo <=u Hi-Lo
+ Constant* NegLo = ConstantExpr::getNeg(Leaf.Low);
+ Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo,
+ Val->getName()+".off",
+ NewLeaf);
+ Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound,
+ "SwitchLeaf");
+ }
+ }
+
+ // Make the conditional branch...
+ BasicBlock* Succ = Leaf.BB;
+ BranchInst::Create(Succ, Default, Comp, NewLeaf);
+
+ // If there were any PHI nodes in this successor, rewrite one entry
+ // from OrigBlock to come from NewLeaf.
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode* PN = cast<PHINode>(I);
+ // Remove all but one incoming entries from the cluster
+ uint64_t Range = Leaf.High->getSExtValue() -
+ Leaf.Low->getSExtValue();
+ for (uint64_t j = 0; j < Range; ++j) {
+ PN->removeIncomingValue(OrigBlock);
+ }
+
+ int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+ assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+ PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf);
+ }
+
+ return NewLeaf;
+}
+
+/// Transform simple list of Cases into list of CaseRange's.
+unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
+ unsigned numCmps = 0;
+
+ // Start with "simple" cases
+ for (auto Case : SI->cases())
+ Cases.push_back(CaseRange(Case.getCaseValue(), Case.getCaseValue(),
+ Case.getCaseSuccessor()));
+
+ std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+ // Merge case into clusters
+ if (Cases.size() >= 2) {
+ CaseItr I = Cases.begin();
+ for (CaseItr J = std::next(I), E = Cases.end(); J != E; ++J) {
+ int64_t nextValue = J->Low->getSExtValue();
+ int64_t currentValue = I->High->getSExtValue();
+ BasicBlock* nextBB = J->BB;
+ BasicBlock* currentBB = I->BB;
+
+ // If the two neighboring cases go to the same destination, merge them
+ // into a single case.
+ assert(nextValue > currentValue && "Cases should be strictly ascending");
+ if ((nextValue == currentValue + 1) && (currentBB == nextBB)) {
+ I->High = J->High;
+ // FIXME: Combine branch weights.
+ } else if (++I != J) {
+ *I = *J;
+ }
+ }
+ Cases.erase(std::next(I), Cases.end());
+ }
+
+ for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+ if (I->Low != I->High)
+ // A range counts double, since it requires two compares.
+ ++numCmps;
+ }
+
+ return numCmps;
+}
+
+/// Replace the specified switch instruction with a sequence of chained if-then
+/// insts in a balanced binary search.
+void LowerSwitch::processSwitchInst(SwitchInst *SI,
+ SmallPtrSetImpl<BasicBlock*> &DeleteList) {
+ BasicBlock *CurBlock = SI->getParent();
+ BasicBlock *OrigBlock = CurBlock;
+ Function *F = CurBlock->getParent();
+ Value *Val = SI->getCondition(); // The value we are switching on...
+ BasicBlock* Default = SI->getDefaultDest();
+
+ // Don't handle unreachable blocks. If there are successors with phis, this
+ // would leave them behind with missing predecessors.
+ if ((CurBlock != &F->getEntryBlock() && pred_empty(CurBlock)) ||
+ CurBlock->getSinglePredecessor() == CurBlock) {
+ DeleteList.insert(CurBlock);
+ return;
+ }
+
+ // If there is only the default destination, just branch.
+ if (!SI->getNumCases()) {
+ BranchInst::Create(Default, CurBlock);
+ SI->eraseFromParent();
+ return;
+ }
+
+ // Prepare cases vector.
+ CaseVector Cases;
+ unsigned numCmps = Clusterify(Cases, SI);
+ DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total compares: " << numCmps << "\n");
+ DEBUG(dbgs() << "Cases: " << Cases << "\n");
+ (void)numCmps;
+
+ ConstantInt *LowerBound = nullptr;
+ ConstantInt *UpperBound = nullptr;
+ std::vector<IntRange> UnreachableRanges;
+
+ if (isa<UnreachableInst>(Default->getFirstNonPHIOrDbg())) {
+ // Make the bounds tightly fitted around the case value range, because we
+ // know that the value passed to the switch must be exactly one of the case
+ // values.
+ assert(!Cases.empty());
+ LowerBound = Cases.front().Low;
+ UpperBound = Cases.back().High;
+
+ DenseMap<BasicBlock *, unsigned> Popularity;
+ unsigned MaxPop = 0;
+ BasicBlock *PopSucc = nullptr;
+
+ IntRange R = { INT64_MIN, INT64_MAX };
+ UnreachableRanges.push_back(R);
+ for (const auto &I : Cases) {
+ int64_t Low = I.Low->getSExtValue();
+ int64_t High = I.High->getSExtValue();
+
+ IntRange &LastRange = UnreachableRanges.back();
+ if (LastRange.Low == Low) {
+ // There is nothing left of the previous range.
+ UnreachableRanges.pop_back();
+ } else {
+ // Terminate the previous range.
+ assert(Low > LastRange.Low);
+ LastRange.High = Low - 1;
+ }
+ if (High != INT64_MAX) {
+ IntRange R = { High + 1, INT64_MAX };
+ UnreachableRanges.push_back(R);
+ }
+
+ // Count popularity.
+ int64_t N = High - Low + 1;
+ unsigned &Pop = Popularity[I.BB];
+ if ((Pop += N) > MaxPop) {
+ MaxPop = Pop;
+ PopSucc = I.BB;
+ }
+ }
+#ifndef NDEBUG
+ /* UnreachableRanges should be sorted and the ranges non-adjacent. */
+ for (auto I = UnreachableRanges.begin(), E = UnreachableRanges.end();
+ I != E; ++I) {
+ assert(I->Low <= I->High);
+ auto Next = I + 1;
+ if (Next != E) {
+ assert(Next->Low > I->High);
+ }
+ }
+#endif
+
+ // Use the most popular block as the new default, reducing the number of
+ // cases.
+ assert(MaxPop > 0 && PopSucc);
+ Default = PopSucc;
+ Cases.erase(
+ remove_if(Cases,
+ [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }),
+ Cases.end());
+
+ // If there are no cases left, just branch.
+ if (Cases.empty()) {
+ BranchInst::Create(Default, CurBlock);
+ SI->eraseFromParent();
+ return;
+ }
+ }
+
+ // Create a new, empty default block so that the new hierarchy of
+ // if-then statements go to this and the PHI nodes are happy.
+ BasicBlock *NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
+ F->getBasicBlockList().insert(Default->getIterator(), NewDefault);
+ BranchInst::Create(Default, NewDefault);
+
+ // If there is an entry in any PHI nodes for the default edge, make sure
+ // to update them as well.
+ for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+ assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+ PN->setIncomingBlock((unsigned)BlockIdx, NewDefault);
+ }
+
+ BasicBlock *SwitchBlock =
+ switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val,
+ OrigBlock, OrigBlock, NewDefault, UnreachableRanges);
+
+ // Branch to our shiny new if-then stuff...
+ BranchInst::Create(SwitchBlock, OrigBlock);
+
+ // We are now done with the switch instruction, delete it.
+ BasicBlock *OldDefault = SI->getDefaultDest();
+ CurBlock->getInstList().erase(SI);
+
+ // If the Default block has no more predecessors just add it to DeleteList.
+ if (pred_begin(OldDefault) == pred_end(OldDefault))
+ DeleteList.insert(OldDefault);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
new file mode 100644
index 000000000000..b659a2e4463f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
@@ -0,0 +1,108 @@
+//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is a simple pass wrapper around the PromoteMemToReg function call
+// exposed by the Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Mem2Reg.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "mem2reg"
+
+STATISTIC(NumPromoted, "Number of alloca's promoted");
+
+static bool promoteMemoryToRegister(Function &F, DominatorTree &DT,
+ AssumptionCache &AC) {
+ std::vector<AllocaInst *> Allocas;
+ BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
+ bool Changed = false;
+
+ while (1) {
+ Allocas.clear();
+
+ // Find allocas that are safe to promote, by looking at all instructions in
+ // the entry node
+ for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
+ if (isAllocaPromotable(AI))
+ Allocas.push_back(AI);
+
+ if (Allocas.empty())
+ break;
+
+ PromoteMemToReg(Allocas, DT, &AC);
+ NumPromoted += Allocas.size();
+ Changed = true;
+ }
+ return Changed;
+}
+
+PreservedAnalyses PromotePass::run(Function &F, FunctionAnalysisManager &AM) {
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ if (!promoteMemoryToRegister(F, DT, AC))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
+namespace {
+struct PromoteLegacyPass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ PromoteLegacyPass() : FunctionPass(ID) {
+ initializePromoteLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ // runOnFunction - To run this pass, first we calculate the alloca
+ // instructions that are safe for promotion, then we promote each one.
+ //
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ AssumptionCache &AC =
+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ return promoteMemoryToRegister(F, DT, AC);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.setPreservesCFG();
+ }
+ };
+} // end of anonymous namespace
+
+char PromoteLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(PromoteLegacyPass, "mem2reg", "Promote Memory to "
+ "Register",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(PromoteLegacyPass, "mem2reg", "Promote Memory to Register",
+ false, false)
+
+// createPromoteMemoryToRegister - Provide an entry point to create this pass.
+//
+FunctionPass *llvm::createPromoteMemoryToRegisterPass() {
+ return new PromoteLegacyPass();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
new file mode 100644
index 000000000000..9f2ad540c83d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -0,0 +1,161 @@
+//===- MetaRenamer.cpp - Rename everything with metasyntatic names --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass renames everything with metasyntatic names. The intent is to use
+// this pass after bugpoint reduction to conceal the nature of the original
+// program.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/TypeFinder.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
+using namespace llvm;
+
+namespace {
+
+ // This PRNG is from the ISO C spec. It is intentionally simple and
+ // unsuitable for cryptographic use. We're just looking for enough
+ // variety to surprise and delight users.
+ struct PRNG {
+ unsigned long next;
+
+ void srand(unsigned int seed) {
+ next = seed;
+ }
+
+ int rand() {
+ next = next * 1103515245 + 12345;
+ return (unsigned int)(next / 65536) % 32768;
+ }
+ };
+
+ static const char *const metaNames[] = {
+ // See http://en.wikipedia.org/wiki/Metasyntactic_variable
+ "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
+ "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam"
+ };
+
+ struct Renamer {
+ Renamer(unsigned int seed) {
+ prng.srand(seed);
+ }
+
+ const char *newName() {
+ return metaNames[prng.rand() % array_lengthof(metaNames)];
+ }
+
+ PRNG prng;
+ };
+
+ struct MetaRenamer : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ MetaRenamer() : ModulePass(ID) {
+ initializeMetaRenamerPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.setPreservesAll();
+ }
+
+ bool runOnModule(Module &M) override {
+ // Seed our PRNG with simple additive sum of ModuleID. We're looking to
+ // simply avoid always having the same function names, and we need to
+ // remain deterministic.
+ unsigned int randSeed = 0;
+ for (auto C : M.getModuleIdentifier())
+ randSeed += C;
+
+ Renamer renamer(randSeed);
+
+ // Rename all aliases
+ for (auto AI = M.alias_begin(), AE = M.alias_end(); AI != AE; ++AI) {
+ StringRef Name = AI->getName();
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+ continue;
+
+ AI->setName("alias");
+ }
+
+ // Rename all global variables
+ for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) {
+ StringRef Name = GI->getName();
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+ continue;
+
+ GI->setName("global");
+ }
+
+ // Rename all struct types
+ TypeFinder StructTypes;
+ StructTypes.run(M, true);
+ for (StructType *STy : StructTypes) {
+ if (STy->isLiteral() || STy->getName().empty()) continue;
+
+ SmallString<128> NameStorage;
+ STy->setName((Twine("struct.") +
+ renamer.newName()).toStringRef(NameStorage));
+ }
+
+ // Rename all functions
+ const TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ for (auto &F : M) {
+ StringRef Name = F.getName();
+ LibFunc Tmp;
+ // Leave library functions alone because their presence or absence could
+ // affect the behavior of other passes.
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
+ TLI.getLibFunc(F, Tmp))
+ continue;
+
+ F.setName(renamer.newName());
+ runOnFunction(F);
+ }
+ return true;
+ }
+
+ bool runOnFunction(Function &F) {
+ for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI)
+ if (!AI->getType()->isVoidTy())
+ AI->setName("arg");
+
+ for (auto &BB : F) {
+ BB.setName("bb");
+
+ for (auto &I : BB)
+ if (!I.getType()->isVoidTy())
+ I.setName("tmp");
+ }
+ return true;
+ }
+ };
+}
+
+char MetaRenamer::ID = 0;
+INITIALIZE_PASS_BEGIN(MetaRenamer, "metarenamer",
+ "Assign new names to everything", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(MetaRenamer, "metarenamer",
+ "Assign new names to everything", false, false)
+//===----------------------------------------------------------------------===//
+//
+// MetaRenamer - Rename everything with metasyntactic names.
+//
+ModulePass *llvm::createMetaRenamerPass() {
+ return new MetaRenamer();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
new file mode 100644
index 000000000000..2ef3d6336ae2
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -0,0 +1,271 @@
+//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform manipulations on Modules.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static void appendToGlobalArray(const char *Array, Module &M, Function *F,
+ int Priority, Constant *Data) {
+ IRBuilder<> IRB(M.getContext());
+ FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
+
+ // Get the current set of static global constructors and add the new ctor
+ // to the list.
+ SmallVector<Constant *, 16> CurrentCtors;
+ StructType *EltTy;
+ if (GlobalVariable *GVCtor = M.getNamedGlobal(Array)) {
+ ArrayType *ATy = cast<ArrayType>(GVCtor->getValueType());
+ StructType *OldEltTy = cast<StructType>(ATy->getElementType());
+ // Upgrade a 2-field global array type to the new 3-field format if needed.
+ if (Data && OldEltTy->getNumElements() < 3)
+ EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
+ IRB.getInt8PtrTy());
+ else
+ EltTy = OldEltTy;
+ if (Constant *Init = GVCtor->getInitializer()) {
+ unsigned n = Init->getNumOperands();
+ CurrentCtors.reserve(n + 1);
+ for (unsigned i = 0; i != n; ++i) {
+ auto Ctor = cast<Constant>(Init->getOperand(i));
+ if (EltTy != OldEltTy)
+ Ctor =
+ ConstantStruct::get(EltTy, Ctor->getAggregateElement((unsigned)0),
+ Ctor->getAggregateElement(1),
+ Constant::getNullValue(IRB.getInt8PtrTy()));
+ CurrentCtors.push_back(Ctor);
+ }
+ }
+ GVCtor->eraseFromParent();
+ } else {
+ // Use the new three-field struct if there isn't one already.
+ EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
+ IRB.getInt8PtrTy());
+ }
+
+ // Build a 2 or 3 field global_ctor entry. We don't take a comdat key.
+ Constant *CSVals[3];
+ CSVals[0] = IRB.getInt32(Priority);
+ CSVals[1] = F;
+ // FIXME: Drop support for the two element form in LLVM 4.0.
+ if (EltTy->getNumElements() >= 3)
+ CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy())
+ : Constant::getNullValue(IRB.getInt8PtrTy());
+ Constant *RuntimeCtorInit =
+ ConstantStruct::get(EltTy, makeArrayRef(CSVals, EltTy->getNumElements()));
+
+ CurrentCtors.push_back(RuntimeCtorInit);
+
+ // Create a new initializer.
+ ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
+ Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
+
+ // Create the new global variable and replace all uses of
+ // the old global variable with the new one.
+ (void)new GlobalVariable(M, NewInit->getType(), false,
+ GlobalValue::AppendingLinkage, NewInit, Array);
+}
+
+void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
+ appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data);
+}
+
+void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
+ appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data);
+}
+
+static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
+ GlobalVariable *GV = M.getGlobalVariable(Name);
+ SmallPtrSet<Constant *, 16> InitAsSet;
+ SmallVector<Constant *, 16> Init;
+ if (GV) {
+ ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+ for (auto &Op : CA->operands()) {
+ Constant *C = cast_or_null<Constant>(Op);
+ if (InitAsSet.insert(C).second)
+ Init.push_back(C);
+ }
+ GV->eraseFromParent();
+ }
+
+ Type *Int8PtrTy = llvm::Type::getInt8PtrTy(M.getContext());
+ for (auto *V : Values) {
+ Constant *C = ConstantExpr::getBitCast(V, Int8PtrTy);
+ if (InitAsSet.insert(C).second)
+ Init.push_back(C);
+ }
+
+ if (Init.empty())
+ return;
+
+ ArrayType *ATy = ArrayType::get(Int8PtrTy, Init.size());
+ GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
+ ConstantArray::get(ATy, Init), Name);
+ GV->setSection("llvm.metadata");
+}
+
+void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
+ appendToUsedList(M, "llvm.used", Values);
+}
+
+void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
+ appendToUsedList(M, "llvm.compiler.used", Values);
+}
+
+Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) {
+ if (isa<Function>(FuncOrBitcast))
+ return cast<Function>(FuncOrBitcast);
+ FuncOrBitcast->print(errs());
+ errs() << '\n';
+ std::string Err;
+ raw_string_ostream Stream(Err);
+ Stream << "Sanitizer interface function redefined: " << *FuncOrBitcast;
+ report_fatal_error(Err);
+}
+
+Function *llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
+ ArrayRef<Type *> InitArgTypes) {
+ assert(!InitName.empty() && "Expected init function name");
+ Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ InitName,
+ FunctionType::get(Type::getVoidTy(M.getContext()), InitArgTypes, false),
+ AttributeList()));
+ F->setLinkage(Function::ExternalLinkage);
+ return F;
+}
+
+std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
+ Module &M, StringRef CtorName, StringRef InitName,
+ ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
+ StringRef VersionCheckName) {
+ assert(!InitName.empty() && "Expected init function name");
+ assert(InitArgs.size() == InitArgTypes.size() &&
+ "Sanitizer's init function expects different number of arguments");
+ Function *InitFunction =
+ declareSanitizerInitFunction(M, InitName, InitArgTypes);
+ Function *Ctor = Function::Create(
+ FunctionType::get(Type::getVoidTy(M.getContext()), false),
+ GlobalValue::InternalLinkage, CtorName, &M);
+ BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
+ IRBuilder<> IRB(ReturnInst::Create(M.getContext(), CtorBB));
+ IRB.CreateCall(InitFunction, InitArgs);
+ if (!VersionCheckName.empty()) {
+ Function *VersionCheckFunction =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
+ AttributeList()));
+ IRB.CreateCall(VersionCheckFunction, {});
+ }
+ return std::make_pair(Ctor, InitFunction);
+}
+
+void llvm::filterDeadComdatFunctions(
+ Module &M, SmallVectorImpl<Function *> &DeadComdatFunctions) {
+ // Build a map from the comdat to the number of entries in that comdat we
+ // think are dead. If this fully covers the comdat group, then the entire
+ // group is dead. If we find another entry in the comdat group though, we'll
+ // have to preserve the whole group.
+ SmallDenseMap<Comdat *, int, 16> ComdatEntriesCovered;
+ for (Function *F : DeadComdatFunctions) {
+ Comdat *C = F->getComdat();
+ assert(C && "Expected all input GVs to be in a comdat!");
+ ComdatEntriesCovered[C] += 1;
+ }
+
+ auto CheckComdat = [&](Comdat &C) {
+ auto CI = ComdatEntriesCovered.find(&C);
+ if (CI == ComdatEntriesCovered.end())
+ return;
+
+ // If this could have been covered by a dead entry, just subtract one to
+ // account for it.
+ if (CI->second > 0) {
+ CI->second -= 1;
+ return;
+ }
+
+ // If we've already accounted for all the entries that were dead, the
+ // entire comdat is alive so remove it from the map.
+ ComdatEntriesCovered.erase(CI);
+ };
+
+ auto CheckAllComdats = [&] {
+ for (Function &F : M.functions())
+ if (Comdat *C = F.getComdat()) {
+ CheckComdat(*C);
+ if (ComdatEntriesCovered.empty())
+ return;
+ }
+ for (GlobalVariable &GV : M.globals())
+ if (Comdat *C = GV.getComdat()) {
+ CheckComdat(*C);
+ if (ComdatEntriesCovered.empty())
+ return;
+ }
+ for (GlobalAlias &GA : M.aliases())
+ if (Comdat *C = GA.getComdat()) {
+ CheckComdat(*C);
+ if (ComdatEntriesCovered.empty())
+ return;
+ }
+ };
+ CheckAllComdats();
+
+ if (ComdatEntriesCovered.empty()) {
+ DeadComdatFunctions.clear();
+ return;
+ }
+
+ // Remove the entries that were not covering.
+ erase_if(DeadComdatFunctions, [&](GlobalValue *GV) {
+ return ComdatEntriesCovered.find(GV->getComdat()) ==
+ ComdatEntriesCovered.end();
+ });
+}
+
+std::string llvm::getUniqueModuleId(Module *M) {
+ MD5 Md5;
+ bool ExportsSymbols = false;
+ auto AddGlobal = [&](GlobalValue &GV) {
+ if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
+ !GV.hasExternalLinkage())
+ return;
+ ExportsSymbols = true;
+ Md5.update(GV.getName());
+ Md5.update(ArrayRef<uint8_t>{0});
+ };
+
+ for (auto &F : *M)
+ AddGlobal(F);
+ for (auto &GV : M->globals())
+ AddGlobal(GV);
+ for (auto &GA : M->aliases())
+ AddGlobal(GA);
+ for (auto &IF : M->ifuncs())
+ AddGlobal(IF);
+
+ if (!ExportsSymbols)
+ return "";
+
+ MD5::MD5Result R;
+ Md5.final(R);
+
+ SmallString<32> Str;
+ MD5::stringifyResult(R, Str);
+ return ("$" + Str).str();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp b/contrib/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
new file mode 100644
index 000000000000..34dc1cccdd5b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
@@ -0,0 +1,121 @@
+//===- NameAnonGlobals.cpp - ThinLTO Support: Name Unnamed Globals --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements naming anonymous globals to make sure they can be
+// referred to by ThinLTO.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/NameAnonGlobals.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+namespace {
+// Compute a "unique" hash for the module based on the name of the public
+// globals.
+class ModuleHasher {
+ Module &TheModule;
+ std::string TheHash;
+
+public:
+ ModuleHasher(Module &M) : TheModule(M) {}
+
+ /// Return the lazily computed hash.
+ std::string &get() {
+ if (!TheHash.empty())
+ // Cache hit :)
+ return TheHash;
+
+ MD5 Hasher;
+ for (auto &F : TheModule) {
+ if (F.isDeclaration() || F.hasLocalLinkage() || !F.hasName())
+ continue;
+ auto Name = F.getName();
+ Hasher.update(Name);
+ }
+ for (auto &GV : TheModule.globals()) {
+ if (GV.isDeclaration() || GV.hasLocalLinkage() || !GV.hasName())
+ continue;
+ auto Name = GV.getName();
+ Hasher.update(Name);
+ }
+
+ // Now return the result.
+ MD5::MD5Result Hash;
+ Hasher.final(Hash);
+ SmallString<32> Result;
+ MD5::stringifyResult(Hash, Result);
+ TheHash = Result.str();
+ return TheHash;
+ }
+};
+} // end anonymous namespace
+
+// Rename all the anon globals in the module
+bool llvm::nameUnamedGlobals(Module &M) {
+ bool Changed = false;
+ ModuleHasher ModuleHash(M);
+ int count = 0;
+ auto RenameIfNeed = [&](GlobalValue &GV) {
+ if (GV.hasName())
+ return;
+ GV.setName(Twine("anon.") + ModuleHash.get() + "." + Twine(count++));
+ Changed = true;
+ };
+ for (auto &GO : M.global_objects())
+ RenameIfNeed(GO);
+ for (auto &GA : M.aliases())
+ RenameIfNeed(GA);
+
+ return Changed;
+}
+
+namespace {
+
+// Legacy pass that provides a name to every anon globals.
+class NameAnonGlobalLegacyPass : public ModulePass {
+
+public:
+ /// Pass identification, replacement for typeid
+ static char ID;
+
+ /// Specify pass name for debug output
+ StringRef getPassName() const override { return "Name Anon Globals"; }
+
+ explicit NameAnonGlobalLegacyPass() : ModulePass(ID) {}
+
+ bool runOnModule(Module &M) override { return nameUnamedGlobals(M); }
+};
+char NameAnonGlobalLegacyPass::ID = 0;
+
+} // anonymous namespace
+
+PreservedAnalyses NameAnonGlobalPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ if (!nameUnamedGlobals(M))
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
+
+INITIALIZE_PASS_BEGIN(NameAnonGlobalLegacyPass, "name-anon-globals",
+ "Provide a name to nameless globals", false, false)
+INITIALIZE_PASS_END(NameAnonGlobalLegacyPass, "name-anon-globals",
+ "Provide a name to nameless globals", false, false)
+
+namespace llvm {
+ModulePass *createNameAnonGlobalPass() {
+ return new NameAnonGlobalLegacyPass();
+}
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp
new file mode 100644
index 000000000000..dc780542ce68
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp
@@ -0,0 +1,32 @@
+//===-- OrderedInstructions.cpp - Instruction dominance function ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines utility to check dominance relation of 2 instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/OrderedInstructions.h"
+using namespace llvm;
+
+/// Given 2 instructions, use OrderedBasicBlock to check for dominance relation
+/// if the instructions are in the same basic block, Otherwise, use dominator
+/// tree.
+bool OrderedInstructions::dominates(const Instruction *InstA,
+ const Instruction *InstB) const {
+ const BasicBlock *IBB = InstA->getParent();
+ // Use ordered basic block to do dominance check in case the 2 instructions
+ // are in the same basic block.
+ if (IBB == InstB->getParent()) {
+ auto OBB = OBBMap.find(IBB);
+ if (OBB == OBBMap.end())
+ OBB = OBBMap.insert({IBB, make_unique<OrderedBasicBlock>(IBB)}).first;
+ return OBB->second->dominates(InstA, InstB);
+ }
+ return DT->dominates(InstA->getParent(), InstB->getParent());
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp
new file mode 100644
index 000000000000..d4cdaede6b86
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -0,0 +1,793 @@
+//===-- PredicateInfo.cpp - PredicateInfo Builder--------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------===//
+//
+// This file implements the PredicateInfo class.
+//
+//===----------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/PredicateInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/IR/AssemblyAnnotationWriter.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/OrderedInstructions.h"
+#include <algorithm>
+#define DEBUG_TYPE "predicateinfo"
+using namespace llvm;
+using namespace PatternMatch;
+using namespace llvm::PredicateInfoClasses;
+
+INITIALIZE_PASS_BEGIN(PredicateInfoPrinterLegacyPass, "print-predicateinfo",
+ "PredicateInfo Printer", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_END(PredicateInfoPrinterLegacyPass, "print-predicateinfo",
+ "PredicateInfo Printer", false, false)
+static cl::opt<bool> VerifyPredicateInfo(
+ "verify-predicateinfo", cl::init(false), cl::Hidden,
+ cl::desc("Verify PredicateInfo in legacy printer pass."));
+namespace {
+DEBUG_COUNTER(RenameCounter, "predicateinfo-rename",
+ "Controls which variables are renamed with predicateinfo")
+// Given a predicate info that is a type of branching terminator, get the
+// branching block.
+const BasicBlock *getBranchBlock(const PredicateBase *PB) {
+ assert(isa<PredicateWithEdge>(PB) &&
+ "Only branches and switches should have PHIOnly defs that "
+ "require branch blocks.");
+ return cast<PredicateWithEdge>(PB)->From;
+}
+
+// Given a predicate info that is a type of branching terminator, get the
+// branching terminator.
+static Instruction *getBranchTerminator(const PredicateBase *PB) {
+ assert(isa<PredicateWithEdge>(PB) &&
+ "Not a predicate info type we know how to get a terminator from.");
+ return cast<PredicateWithEdge>(PB)->From->getTerminator();
+}
+
+// Given a predicate info that is a type of branching terminator, get the
+// edge this predicate info represents
+const std::pair<BasicBlock *, BasicBlock *>
+getBlockEdge(const PredicateBase *PB) {
+ assert(isa<PredicateWithEdge>(PB) &&
+ "Not a predicate info type we know how to get an edge from.");
+ const auto *PEdge = cast<PredicateWithEdge>(PB);
+ return std::make_pair(PEdge->From, PEdge->To);
+}
+}
+
+namespace llvm {
+namespace PredicateInfoClasses {
+enum LocalNum {
+ // Operations that must appear first in the block.
+ LN_First,
+ // Operations that are somewhere in the middle of the block, and are sorted on
+ // demand.
+ LN_Middle,
+ // Operations that must appear last in a block, like successor phi node uses.
+ LN_Last
+};
+
+// Associate global and local DFS info with defs and uses, so we can sort them
+// into a global domination ordering.
+struct ValueDFS {
+ int DFSIn = 0;
+ int DFSOut = 0;
+ unsigned int LocalNum = LN_Middle;
+ // Only one of Def or Use will be set.
+ Value *Def = nullptr;
+ Use *U = nullptr;
+ // Neither PInfo nor EdgeOnly participate in the ordering
+ PredicateBase *PInfo = nullptr;
+ bool EdgeOnly = false;
+};
+
+// Perform a strict weak ordering on instructions and arguments.
+static bool valueComesBefore(OrderedInstructions &OI, const Value *A,
+ const Value *B) {
+ auto *ArgA = dyn_cast_or_null<Argument>(A);
+ auto *ArgB = dyn_cast_or_null<Argument>(B);
+ if (ArgA && !ArgB)
+ return true;
+ if (ArgB && !ArgA)
+ return false;
+ if (ArgA && ArgB)
+ return ArgA->getArgNo() < ArgB->getArgNo();
+ return OI.dominates(cast<Instruction>(A), cast<Instruction>(B));
+}
+
+// This compares ValueDFS structures, creating OrderedBasicBlocks where
+// necessary to compare uses/defs in the same block. Doing so allows us to walk
+// the minimum number of instructions necessary to compute our def/use ordering.
+struct ValueDFS_Compare {
+ OrderedInstructions &OI;
+ ValueDFS_Compare(OrderedInstructions &OI) : OI(OI) {}
+
+ bool operator()(const ValueDFS &A, const ValueDFS &B) const {
+ if (&A == &B)
+ return false;
+ // The only case we can't directly compare them is when they in the same
+ // block, and both have localnum == middle. In that case, we have to use
+ // comesbefore to see what the real ordering is, because they are in the
+ // same basic block.
+
+ bool SameBlock = std::tie(A.DFSIn, A.DFSOut) == std::tie(B.DFSIn, B.DFSOut);
+
+ // We want to put the def that will get used for a given set of phi uses,
+ // before those phi uses.
+ // So we sort by edge, then by def.
+ // Note that only phi nodes uses and defs can come last.
+ if (SameBlock && A.LocalNum == LN_Last && B.LocalNum == LN_Last)
+ return comparePHIRelated(A, B);
+
+ if (!SameBlock || A.LocalNum != LN_Middle || B.LocalNum != LN_Middle)
+ return std::tie(A.DFSIn, A.DFSOut, A.LocalNum, A.Def, A.U) <
+ std::tie(B.DFSIn, B.DFSOut, B.LocalNum, B.Def, B.U);
+ return localComesBefore(A, B);
+ }
+
+ // For a phi use, or a non-materialized def, return the edge it represents.
+ const std::pair<BasicBlock *, BasicBlock *>
+ getBlockEdge(const ValueDFS &VD) const {
+ if (!VD.Def && VD.U) {
+ auto *PHI = cast<PHINode>(VD.U->getUser());
+ return std::make_pair(PHI->getIncomingBlock(*VD.U), PHI->getParent());
+ }
+ // This is really a non-materialized def.
+ return ::getBlockEdge(VD.PInfo);
+ }
+
+ // For two phi related values, return the ordering.
+ bool comparePHIRelated(const ValueDFS &A, const ValueDFS &B) const {
+ auto &ABlockEdge = getBlockEdge(A);
+ auto &BBlockEdge = getBlockEdge(B);
+ // Now sort by block edge and then defs before uses.
+ return std::tie(ABlockEdge, A.Def, A.U) < std::tie(BBlockEdge, B.Def, B.U);
+ }
+
+ // Get the definition of an instruction that occurs in the middle of a block.
+ Value *getMiddleDef(const ValueDFS &VD) const {
+ if (VD.Def)
+ return VD.Def;
+ // It's possible for the defs and uses to be null. For branches, the local
+ // numbering will say the placed predicaeinfos should go first (IE
+ // LN_beginning), so we won't be in this function. For assumes, we will end
+ // up here, beause we need to order the def we will place relative to the
+ // assume. So for the purpose of ordering, we pretend the def is the assume
+ // because that is where we will insert the info.
+ if (!VD.U) {
+ assert(VD.PInfo &&
+ "No def, no use, and no predicateinfo should not occur");
+ assert(isa<PredicateAssume>(VD.PInfo) &&
+ "Middle of block should only occur for assumes");
+ return cast<PredicateAssume>(VD.PInfo)->AssumeInst;
+ }
+ return nullptr;
+ }
+
+ // Return either the Def, if it's not null, or the user of the Use, if the def
+ // is null.
+ const Instruction *getDefOrUser(const Value *Def, const Use *U) const {
+ if (Def)
+ return cast<Instruction>(Def);
+ return cast<Instruction>(U->getUser());
+ }
+
+ // This performs the necessary local basic block ordering checks to tell
+ // whether A comes before B, where both are in the same basic block.
+ bool localComesBefore(const ValueDFS &A, const ValueDFS &B) const {
+ auto *ADef = getMiddleDef(A);
+ auto *BDef = getMiddleDef(B);
+
+ // See if we have real values or uses. If we have real values, we are
+ // guaranteed they are instructions or arguments. No matter what, we are
+ // guaranteed they are in the same block if they are instructions.
+ auto *ArgA = dyn_cast_or_null<Argument>(ADef);
+ auto *ArgB = dyn_cast_or_null<Argument>(BDef);
+
+ if (ArgA || ArgB)
+ return valueComesBefore(OI, ArgA, ArgB);
+
+ auto *AInst = getDefOrUser(ADef, A.U);
+ auto *BInst = getDefOrUser(BDef, B.U);
+ return valueComesBefore(OI, AInst, BInst);
+ }
+};
+
+} // namespace PredicateInfoClasses
+
+bool PredicateInfo::stackIsInScope(const ValueDFSStack &Stack,
+ const ValueDFS &VDUse) const {
+ if (Stack.empty())
+ return false;
+ // If it's a phi only use, make sure it's for this phi node edge, and that the
+ // use is in a phi node. If it's anything else, and the top of the stack is
+ // EdgeOnly, we need to pop the stack. We deliberately sort phi uses next to
+ // the defs they must go with so that we can know it's time to pop the stack
+ // when we hit the end of the phi uses for a given def.
+ if (Stack.back().EdgeOnly) {
+ if (!VDUse.U)
+ return false;
+ auto *PHI = dyn_cast<PHINode>(VDUse.U->getUser());
+ if (!PHI)
+ return false;
+ // Check edge
+ BasicBlock *EdgePred = PHI->getIncomingBlock(*VDUse.U);
+ if (EdgePred != getBranchBlock(Stack.back().PInfo))
+ return false;
+
+ // Use dominates, which knows how to handle edge dominance.
+ return DT.dominates(getBlockEdge(Stack.back().PInfo), *VDUse.U);
+ }
+
+ return (VDUse.DFSIn >= Stack.back().DFSIn &&
+ VDUse.DFSOut <= Stack.back().DFSOut);
+}
+
+void PredicateInfo::popStackUntilDFSScope(ValueDFSStack &Stack,
+ const ValueDFS &VD) {
+ while (!Stack.empty() && !stackIsInScope(Stack, VD))
+ Stack.pop_back();
+}
+
+// Convert the uses of Op into a vector of uses, associating global and local
+// DFS info with each one.
+void PredicateInfo::convertUsesToDFSOrdered(
+ Value *Op, SmallVectorImpl<ValueDFS> &DFSOrderedSet) {
+ for (auto &U : Op->uses()) {
+ if (auto *I = dyn_cast<Instruction>(U.getUser())) {
+ ValueDFS VD;
+ // Put the phi node uses in the incoming block.
+ BasicBlock *IBlock;
+ if (auto *PN = dyn_cast<PHINode>(I)) {
+ IBlock = PN->getIncomingBlock(U);
+ // Make phi node users appear last in the incoming block
+ // they are from.
+ VD.LocalNum = LN_Last;
+ } else {
+ // If it's not a phi node use, it is somewhere in the middle of the
+ // block.
+ IBlock = I->getParent();
+ VD.LocalNum = LN_Middle;
+ }
+ DomTreeNode *DomNode = DT.getNode(IBlock);
+ // It's possible our use is in an unreachable block. Skip it if so.
+ if (!DomNode)
+ continue;
+ VD.DFSIn = DomNode->getDFSNumIn();
+ VD.DFSOut = DomNode->getDFSNumOut();
+ VD.U = &U;
+ DFSOrderedSet.push_back(VD);
+ }
+ }
+}
+
+// Collect relevant operations from Comparison that we may want to insert copies
+// for.
+void collectCmpOps(CmpInst *Comparison, SmallVectorImpl<Value *> &CmpOperands) {
+ auto *Op0 = Comparison->getOperand(0);
+ auto *Op1 = Comparison->getOperand(1);
+ if (Op0 == Op1)
+ return;
+ CmpOperands.push_back(Comparison);
+ // Only want real values, not constants. Additionally, operands with one use
+ // are only being used in the comparison, which means they will not be useful
+ // for us to consider for predicateinfo.
+ //
+ if ((isa<Instruction>(Op0) || isa<Argument>(Op0)) && !Op0->hasOneUse())
+ CmpOperands.push_back(Op0);
+ if ((isa<Instruction>(Op1) || isa<Argument>(Op1)) && !Op1->hasOneUse())
+ CmpOperands.push_back(Op1);
+}
+
+// Add Op, PB to the list of value infos for Op, and mark Op to be renamed.
+void PredicateInfo::addInfoFor(SmallPtrSetImpl<Value *> &OpsToRename, Value *Op,
+ PredicateBase *PB) {
+ OpsToRename.insert(Op);
+ auto &OperandInfo = getOrCreateValueInfo(Op);
+ AllInfos.push_back(PB);
+ OperandInfo.Infos.push_back(PB);
+}
+
+// Process an assume instruction and place relevant operations we want to rename
+// into OpsToRename.
+void PredicateInfo::processAssume(IntrinsicInst *II, BasicBlock *AssumeBB,
+ SmallPtrSetImpl<Value *> &OpsToRename) {
+ // See if we have a comparison we support
+ SmallVector<Value *, 8> CmpOperands;
+ SmallVector<Value *, 2> ConditionsToProcess;
+ CmpInst::Predicate Pred;
+ Value *Operand = II->getOperand(0);
+ if (m_c_And(m_Cmp(Pred, m_Value(), m_Value()),
+ m_Cmp(Pred, m_Value(), m_Value()))
+ .match(II->getOperand(0))) {
+ ConditionsToProcess.push_back(cast<BinaryOperator>(Operand)->getOperand(0));
+ ConditionsToProcess.push_back(cast<BinaryOperator>(Operand)->getOperand(1));
+ ConditionsToProcess.push_back(Operand);
+ } else if (isa<CmpInst>(Operand)) {
+
+ ConditionsToProcess.push_back(Operand);
+ }
+ for (auto Cond : ConditionsToProcess) {
+ if (auto *Cmp = dyn_cast<CmpInst>(Cond)) {
+ collectCmpOps(Cmp, CmpOperands);
+ // Now add our copy infos for our operands
+ for (auto *Op : CmpOperands) {
+ auto *PA = new PredicateAssume(Op, II, Cmp);
+ addInfoFor(OpsToRename, Op, PA);
+ }
+ CmpOperands.clear();
+ } else if (auto *BinOp = dyn_cast<BinaryOperator>(Cond)) {
+ // Otherwise, it should be an AND.
+ assert(BinOp->getOpcode() == Instruction::And &&
+ "Should have been an AND");
+ auto *PA = new PredicateAssume(BinOp, II, BinOp);
+ addInfoFor(OpsToRename, BinOp, PA);
+ } else {
+ llvm_unreachable("Unknown type of condition");
+ }
+ }
+}
+
+// Process a block terminating branch, and place relevant operations to be
+// renamed into OpsToRename.
+void PredicateInfo::processBranch(BranchInst *BI, BasicBlock *BranchBB,
+ SmallPtrSetImpl<Value *> &OpsToRename) {
+ BasicBlock *FirstBB = BI->getSuccessor(0);
+ BasicBlock *SecondBB = BI->getSuccessor(1);
+ SmallVector<BasicBlock *, 2> SuccsToProcess;
+ SuccsToProcess.push_back(FirstBB);
+ SuccsToProcess.push_back(SecondBB);
+ SmallVector<Value *, 2> ConditionsToProcess;
+
+ auto InsertHelper = [&](Value *Op, bool isAnd, bool isOr, Value *Cond) {
+ for (auto *Succ : SuccsToProcess) {
+ // Don't try to insert on a self-edge. This is mainly because we will
+ // eliminate during renaming anyway.
+ if (Succ == BranchBB)
+ continue;
+ bool TakenEdge = (Succ == FirstBB);
+ // For and, only insert on the true edge
+ // For or, only insert on the false edge
+ if ((isAnd && !TakenEdge) || (isOr && TakenEdge))
+ continue;
+ PredicateBase *PB =
+ new PredicateBranch(Op, BranchBB, Succ, Cond, TakenEdge);
+ addInfoFor(OpsToRename, Op, PB);
+ if (!Succ->getSinglePredecessor())
+ EdgeUsesOnly.insert({BranchBB, Succ});
+ }
+ };
+
+ // Match combinations of conditions.
+ CmpInst::Predicate Pred;
+ bool isAnd = false;
+ bool isOr = false;
+ SmallVector<Value *, 8> CmpOperands;
+ if (match(BI->getCondition(), m_And(m_Cmp(Pred, m_Value(), m_Value()),
+ m_Cmp(Pred, m_Value(), m_Value()))) ||
+ match(BI->getCondition(), m_Or(m_Cmp(Pred, m_Value(), m_Value()),
+ m_Cmp(Pred, m_Value(), m_Value())))) {
+ auto *BinOp = cast<BinaryOperator>(BI->getCondition());
+ if (BinOp->getOpcode() == Instruction::And)
+ isAnd = true;
+ else if (BinOp->getOpcode() == Instruction::Or)
+ isOr = true;
+ ConditionsToProcess.push_back(BinOp->getOperand(0));
+ ConditionsToProcess.push_back(BinOp->getOperand(1));
+ ConditionsToProcess.push_back(BI->getCondition());
+ } else if (isa<CmpInst>(BI->getCondition())) {
+ ConditionsToProcess.push_back(BI->getCondition());
+ }
+ for (auto Cond : ConditionsToProcess) {
+ if (auto *Cmp = dyn_cast<CmpInst>(Cond)) {
+ collectCmpOps(Cmp, CmpOperands);
+ // Now add our copy infos for our operands
+ for (auto *Op : CmpOperands)
+ InsertHelper(Op, isAnd, isOr, Cmp);
+ } else if (auto *BinOp = dyn_cast<BinaryOperator>(Cond)) {
+ // This must be an AND or an OR.
+ assert((BinOp->getOpcode() == Instruction::And ||
+ BinOp->getOpcode() == Instruction::Or) &&
+ "Should have been an AND or an OR");
+ // The actual value of the binop is not subject to the same restrictions
+ // as the comparison. It's either true or false on the true/false branch.
+ InsertHelper(BinOp, false, false, BinOp);
+ } else {
+ llvm_unreachable("Unknown type of condition");
+ }
+ CmpOperands.clear();
+ }
+}
+// Process a block terminating switch, and place relevant operations to be
+// renamed into OpsToRename.
+void PredicateInfo::processSwitch(SwitchInst *SI, BasicBlock *BranchBB,
+ SmallPtrSetImpl<Value *> &OpsToRename) {
+ Value *Op = SI->getCondition();
+ if ((!isa<Instruction>(Op) && !isa<Argument>(Op)) || Op->hasOneUse())
+ return;
+
+ // Remember how many outgoing edges there are to every successor.
+ SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
+ for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *TargetBlock = SI->getSuccessor(i);
+ ++SwitchEdges[TargetBlock];
+ }
+
+ // Now propagate info for each case value
+ for (auto C : SI->cases()) {
+ BasicBlock *TargetBlock = C.getCaseSuccessor();
+ if (SwitchEdges.lookup(TargetBlock) == 1) {
+ PredicateSwitch *PS = new PredicateSwitch(
+ Op, SI->getParent(), TargetBlock, C.getCaseValue(), SI);
+ addInfoFor(OpsToRename, Op, PS);
+ if (!TargetBlock->getSinglePredecessor())
+ EdgeUsesOnly.insert({BranchBB, TargetBlock});
+ }
+ }
+}
+
+// Build predicate info for our function
+void PredicateInfo::buildPredicateInfo() {
+ DT.updateDFSNumbers();
+ // Collect operands to rename from all conditional branch terminators, as well
+ // as assume statements.
+ SmallPtrSet<Value *, 8> OpsToRename;
+ for (auto DTN : depth_first(DT.getRootNode())) {
+ BasicBlock *BranchBB = DTN->getBlock();
+ if (auto *BI = dyn_cast<BranchInst>(BranchBB->getTerminator())) {
+ if (!BI->isConditional())
+ continue;
+ // Can't insert conditional information if they all go to the same place.
+ if (BI->getSuccessor(0) == BI->getSuccessor(1))
+ continue;
+ processBranch(BI, BranchBB, OpsToRename);
+ } else if (auto *SI = dyn_cast<SwitchInst>(BranchBB->getTerminator())) {
+ processSwitch(SI, BranchBB, OpsToRename);
+ }
+ }
+ for (auto &Assume : AC.assumptions()) {
+ if (auto *II = dyn_cast_or_null<IntrinsicInst>(Assume))
+ processAssume(II, II->getParent(), OpsToRename);
+ }
+ // Now rename all our operations.
+ renameUses(OpsToRename);
+}
+
+// Given the renaming stack, make all the operands currently on the stack real
+// by inserting them into the IR. Return the last operation's value.
+Value *PredicateInfo::materializeStack(unsigned int &Counter,
+ ValueDFSStack &RenameStack,
+ Value *OrigOp) {
+ // Find the first thing we have to materialize
+ auto RevIter = RenameStack.rbegin();
+ for (; RevIter != RenameStack.rend(); ++RevIter)
+ if (RevIter->Def)
+ break;
+
+ size_t Start = RevIter - RenameStack.rbegin();
+ // The maximum number of things we should be trying to materialize at once
+ // right now is 4, depending on if we had an assume, a branch, and both used
+ // and of conditions.
+ for (auto RenameIter = RenameStack.end() - Start;
+ RenameIter != RenameStack.end(); ++RenameIter) {
+ auto *Op =
+ RenameIter == RenameStack.begin() ? OrigOp : (RenameIter - 1)->Def;
+ ValueDFS &Result = *RenameIter;
+ auto *ValInfo = Result.PInfo;
+ // For edge predicates, we can just place the operand in the block before
+ // the terminator. For assume, we have to place it right before the assume
+ // to ensure we dominate all of our uses. Always insert right before the
+ // relevant instruction (terminator, assume), so that we insert in proper
+ // order in the case of multiple predicateinfo in the same block.
+ if (isa<PredicateWithEdge>(ValInfo)) {
+ IRBuilder<> B(getBranchTerminator(ValInfo));
+ Function *IF = Intrinsic::getDeclaration(
+ F.getParent(), Intrinsic::ssa_copy, Op->getType());
+ CallInst *PIC =
+ B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++));
+ PredicateMap.insert({PIC, ValInfo});
+ Result.Def = PIC;
+ } else {
+ auto *PAssume = dyn_cast<PredicateAssume>(ValInfo);
+ assert(PAssume &&
+ "Should not have gotten here without it being an assume");
+ IRBuilder<> B(PAssume->AssumeInst);
+ Function *IF = Intrinsic::getDeclaration(
+ F.getParent(), Intrinsic::ssa_copy, Op->getType());
+ CallInst *PIC = B.CreateCall(IF, Op);
+ PredicateMap.insert({PIC, ValInfo});
+ Result.Def = PIC;
+ }
+ }
+ return RenameStack.back().Def;
+}
+
+// Instead of the standard SSA renaming algorithm, which is O(Number of
+// instructions), and walks the entire dominator tree, we walk only the defs +
+// uses. The standard SSA renaming algorithm does not really rely on the
+// dominator tree except to order the stack push/pops of the renaming stacks, so
+// that defs end up getting pushed before hitting the correct uses. This does
+// not require the dominator tree, only the *order* of the dominator tree. The
+// complete and correct ordering of the defs and uses, in dominator tree is
+// contained in the DFS numbering of the dominator tree. So we sort the defs and
+// uses into the DFS ordering, and then just use the renaming stack as per
+// normal, pushing when we hit a def (which is a predicateinfo instruction),
+// popping when we are out of the dfs scope for that def, and replacing any uses
+// with top of stack if it exists. In order to handle liveness without
+// propagating liveness info, we don't actually insert the predicateinfo
+// instruction def until we see a use that it would dominate. Once we see such
+// a use, we materialize the predicateinfo instruction in the right place and
+// use it.
+//
+// TODO: Use this algorithm to perform fast single-variable renaming in
+// promotememtoreg and memoryssa.
+void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {
+ // Sort OpsToRename since we are going to iterate it.
+ SmallVector<Value *, 8> OpsToRename(OpSet.begin(), OpSet.end());
+ auto Comparator = [&](const Value *A, const Value *B) {
+ return valueComesBefore(OI, A, B);
+ };
+ std::sort(OpsToRename.begin(), OpsToRename.end(), Comparator);
+ ValueDFS_Compare Compare(OI);
+ // Compute liveness, and rename in O(uses) per Op.
+ for (auto *Op : OpsToRename) {
+ unsigned Counter = 0;
+ SmallVector<ValueDFS, 16> OrderedUses;
+ const auto &ValueInfo = getValueInfo(Op);
+ // Insert the possible copies into the def/use list.
+ // They will become real copies if we find a real use for them, and never
+ // created otherwise.
+ for (auto &PossibleCopy : ValueInfo.Infos) {
+ ValueDFS VD;
+ // Determine where we are going to place the copy by the copy type.
+ // The predicate info for branches always come first, they will get
+ // materialized in the split block at the top of the block.
+ // The predicate info for assumes will be somewhere in the middle,
+ // it will get materialized in front of the assume.
+ if (const auto *PAssume = dyn_cast<PredicateAssume>(PossibleCopy)) {
+ VD.LocalNum = LN_Middle;
+ DomTreeNode *DomNode = DT.getNode(PAssume->AssumeInst->getParent());
+ if (!DomNode)
+ continue;
+ VD.DFSIn = DomNode->getDFSNumIn();
+ VD.DFSOut = DomNode->getDFSNumOut();
+ VD.PInfo = PossibleCopy;
+ OrderedUses.push_back(VD);
+ } else if (isa<PredicateWithEdge>(PossibleCopy)) {
+ // If we can only do phi uses, we treat it like it's in the branch
+ // block, and handle it specially. We know that it goes last, and only
+ // dominate phi uses.
+ auto BlockEdge = getBlockEdge(PossibleCopy);
+ if (EdgeUsesOnly.count(BlockEdge)) {
+ VD.LocalNum = LN_Last;
+ auto *DomNode = DT.getNode(BlockEdge.first);
+ if (DomNode) {
+ VD.DFSIn = DomNode->getDFSNumIn();
+ VD.DFSOut = DomNode->getDFSNumOut();
+ VD.PInfo = PossibleCopy;
+ VD.EdgeOnly = true;
+ OrderedUses.push_back(VD);
+ }
+ } else {
+ // Otherwise, we are in the split block (even though we perform
+ // insertion in the branch block).
+ // Insert a possible copy at the split block and before the branch.
+ VD.LocalNum = LN_First;
+ auto *DomNode = DT.getNode(BlockEdge.second);
+ if (DomNode) {
+ VD.DFSIn = DomNode->getDFSNumIn();
+ VD.DFSOut = DomNode->getDFSNumOut();
+ VD.PInfo = PossibleCopy;
+ OrderedUses.push_back(VD);
+ }
+ }
+ }
+ }
+
+ convertUsesToDFSOrdered(Op, OrderedUses);
+ std::sort(OrderedUses.begin(), OrderedUses.end(), Compare);
+ SmallVector<ValueDFS, 8> RenameStack;
+ // For each use, sorted into dfs order, push values and replaces uses with
+ // top of stack, which will represent the reaching def.
+ for (auto &VD : OrderedUses) {
+ // We currently do not materialize copy over copy, but we should decide if
+ // we want to.
+ bool PossibleCopy = VD.PInfo != nullptr;
+ if (RenameStack.empty()) {
+ DEBUG(dbgs() << "Rename Stack is empty\n");
+ } else {
+ DEBUG(dbgs() << "Rename Stack Top DFS numbers are ("
+ << RenameStack.back().DFSIn << ","
+ << RenameStack.back().DFSOut << ")\n");
+ }
+
+ DEBUG(dbgs() << "Current DFS numbers are (" << VD.DFSIn << ","
+ << VD.DFSOut << ")\n");
+
+ bool ShouldPush = (VD.Def || PossibleCopy);
+ bool OutOfScope = !stackIsInScope(RenameStack, VD);
+ if (OutOfScope || ShouldPush) {
+ // Sync to our current scope.
+ popStackUntilDFSScope(RenameStack, VD);
+ if (ShouldPush) {
+ RenameStack.push_back(VD);
+ }
+ }
+ // If we get to this point, and the stack is empty we must have a use
+ // with no renaming needed, just skip it.
+ if (RenameStack.empty())
+ continue;
+ // Skip values, only want to rename the uses
+ if (VD.Def || PossibleCopy)
+ continue;
+ if (!DebugCounter::shouldExecute(RenameCounter)) {
+ DEBUG(dbgs() << "Skipping execution due to debug counter\n");
+ continue;
+ }
+ ValueDFS &Result = RenameStack.back();
+
+ // If the possible copy dominates something, materialize our stack up to
+ // this point. This ensures every comparison that affects our operation
+ // ends up with predicateinfo.
+ if (!Result.Def)
+ Result.Def = materializeStack(Counter, RenameStack, Op);
+
+ DEBUG(dbgs() << "Found replacement " << *Result.Def << " for "
+ << *VD.U->get() << " in " << *(VD.U->getUser()) << "\n");
+ assert(DT.dominates(cast<Instruction>(Result.Def), *VD.U) &&
+ "Predicateinfo def should have dominated this use");
+ VD.U->set(Result.Def);
+ }
+ }
+}
+
+PredicateInfo::ValueInfo &PredicateInfo::getOrCreateValueInfo(Value *Operand) {
+ auto OIN = ValueInfoNums.find(Operand);
+ if (OIN == ValueInfoNums.end()) {
+ // This will grow it
+ ValueInfos.resize(ValueInfos.size() + 1);
+ // This will use the new size and give us a 0 based number of the info
+ auto InsertResult = ValueInfoNums.insert({Operand, ValueInfos.size() - 1});
+ assert(InsertResult.second && "Value info number already existed?");
+ return ValueInfos[InsertResult.first->second];
+ }
+ return ValueInfos[OIN->second];
+}
+
+const PredicateInfo::ValueInfo &
+PredicateInfo::getValueInfo(Value *Operand) const {
+ auto OINI = ValueInfoNums.lookup(Operand);
+ assert(OINI != 0 && "Operand was not really in the Value Info Numbers");
+ assert(OINI < ValueInfos.size() &&
+ "Value Info Number greater than size of Value Info Table");
+ return ValueInfos[OINI];
+}
+
+PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT,
+ AssumptionCache &AC)
+ : F(F), DT(DT), AC(AC), OI(&DT) {
+ // Push an empty operand info so that we can detect 0 as not finding one
+ ValueInfos.resize(1);
+ buildPredicateInfo();
+}
+
+PredicateInfo::~PredicateInfo() {}
+
+void PredicateInfo::verifyPredicateInfo() const {}
+
+char PredicateInfoPrinterLegacyPass::ID = 0;
+
+PredicateInfoPrinterLegacyPass::PredicateInfoPrinterLegacyPass()
+ : FunctionPass(ID) {
+ initializePredicateInfoPrinterLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<DominatorTreeWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+}
+
+bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ auto PredInfo = make_unique<PredicateInfo>(F, DT, AC);
+ PredInfo->print(dbgs());
+ if (VerifyPredicateInfo)
+ PredInfo->verifyPredicateInfo();
+ return false;
+}
+
+PreservedAnalyses PredicateInfoPrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ OS << "PredicateInfo for function: " << F.getName() << "\n";
+ make_unique<PredicateInfo>(F, DT, AC)->print(OS);
+
+ return PreservedAnalyses::all();
+}
+
+/// \brief An assembly annotator class to print PredicateInfo information in
+/// comments.
+class PredicateInfoAnnotatedWriter : public AssemblyAnnotationWriter {
+ friend class PredicateInfo;
+ const PredicateInfo *PredInfo;
+
+public:
+ PredicateInfoAnnotatedWriter(const PredicateInfo *M) : PredInfo(M) {}
+
+ virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
+ formatted_raw_ostream &OS) {}
+
+ virtual void emitInstructionAnnot(const Instruction *I,
+ formatted_raw_ostream &OS) {
+ if (const auto *PI = PredInfo->getPredicateInfoFor(I)) {
+ OS << "; Has predicate info\n";
+ if (const auto *PB = dyn_cast<PredicateBranch>(PI)) {
+ OS << "; branch predicate info { TrueEdge: " << PB->TrueEdge
+ << " Comparison:" << *PB->Condition << " Edge: [";
+ PB->From->printAsOperand(OS);
+ OS << ",";
+ PB->To->printAsOperand(OS);
+ OS << "] }\n";
+ } else if (const auto *PS = dyn_cast<PredicateSwitch>(PI)) {
+ OS << "; switch predicate info { CaseValue: " << *PS->CaseValue
+ << " Switch:" << *PS->Switch << " Edge: [";
+ PS->From->printAsOperand(OS);
+ OS << ",";
+ PS->To->printAsOperand(OS);
+ OS << "] }\n";
+ } else if (const auto *PA = dyn_cast<PredicateAssume>(PI)) {
+ OS << "; assume predicate info {"
+ << " Comparison:" << *PA->Condition << " }\n";
+ }
+ }
+ }
+};
+
+void PredicateInfo::print(raw_ostream &OS) const {
+ PredicateInfoAnnotatedWriter Writer(this);
+ F.print(OS, &Writer);
+}
+
+void PredicateInfo::dump() const {
+ PredicateInfoAnnotatedWriter Writer(this);
+ F.print(dbgs(), &Writer);
+}
+
+PreservedAnalyses PredicateInfoVerifierPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ make_unique<PredicateInfo>(F, DT, AC)->verifyPredicateInfo();
+
+ return PreservedAnalyses::all();
+}
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
new file mode 100644
index 000000000000..cdba982e6641
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -0,0 +1,1000 @@
+//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file promotes memory references to be register references. It promotes
+// alloca instructions which only have loads and stores as uses. An alloca is
+// transformed by using iterated dominator frontiers to place PHI nodes, then
+// traversing the function in depth-first order to rewrite loads and stores as
+// appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "mem2reg"
+
+STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
+STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store");
+STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
+STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
+
+bool llvm::isAllocaPromotable(const AllocaInst *AI) {
+ // FIXME: If the memory unit is of pointer or integer type, we can permit
+ // assignments to subsections of the memory unit.
+ unsigned AS = AI->getType()->getAddressSpace();
+
+ // Only allow direct and non-volatile loads and stores...
+ for (const User *U : AI->users()) {
+ if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ // Note that atomic loads can be transformed; atomic semantics do
+ // not have any meaning for a local alloca.
+ if (LI->isVolatile())
+ return false;
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getOperand(0) == AI)
+ return false; // Don't allow a store OF the AI, only INTO the AI.
+ // Note that atomic stores can be transformed; atomic semantics do
+ // not have any meaning for a local alloca.
+ if (SI->isVolatile())
+ return false;
+ } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end)
+ return false;
+ } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
+ return false;
+ if (!onlyUsedByLifetimeMarkers(BCI))
+ return false;
+ } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
+ return false;
+ if (!GEPI->hasAllZeroIndices())
+ return false;
+ if (!onlyUsedByLifetimeMarkers(GEPI))
+ return false;
+ } else {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+namespace {
+
+struct AllocaInfo {
+ SmallVector<BasicBlock *, 32> DefiningBlocks;
+ SmallVector<BasicBlock *, 32> UsingBlocks;
+
+ StoreInst *OnlyStore;
+ BasicBlock *OnlyBlock;
+ bool OnlyUsedInOneBlock;
+
+ Value *AllocaPointerVal;
+ DbgDeclareInst *DbgDeclare;
+
+ void clear() {
+ DefiningBlocks.clear();
+ UsingBlocks.clear();
+ OnlyStore = nullptr;
+ OnlyBlock = nullptr;
+ OnlyUsedInOneBlock = true;
+ AllocaPointerVal = nullptr;
+ DbgDeclare = nullptr;
+ }
+
+ /// Scan the uses of the specified alloca, filling in the AllocaInfo used
+ /// by the rest of the pass to reason about the uses of this alloca.
+ void AnalyzeAlloca(AllocaInst *AI) {
+ clear();
+
+ // As we scan the uses of the alloca instruction, keep track of stores,
+ // and decide whether all of the loads and stores to the alloca are within
+ // the same basic block.
+ for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
+ Instruction *User = cast<Instruction>(*UI++);
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Remember the basic blocks which define new values for the alloca
+ DefiningBlocks.push_back(SI->getParent());
+ AllocaPointerVal = SI->getOperand(0);
+ OnlyStore = SI;
+ } else {
+ LoadInst *LI = cast<LoadInst>(User);
+ // Otherwise it must be a load instruction, keep track of variable
+ // reads.
+ UsingBlocks.push_back(LI->getParent());
+ AllocaPointerVal = LI;
+ }
+
+ if (OnlyUsedInOneBlock) {
+ if (!OnlyBlock)
+ OnlyBlock = User->getParent();
+ else if (OnlyBlock != User->getParent())
+ OnlyUsedInOneBlock = false;
+ }
+ }
+
+ DbgDeclare = FindAllocaDbgDeclare(AI);
+ }
+};
+
+// Data package used by RenamePass()
+class RenamePassData {
+public:
+ typedef std::vector<Value *> ValVector;
+
+ RenamePassData() : BB(nullptr), Pred(nullptr), Values() {}
+ RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V)
+ : BB(B), Pred(P), Values(V) {}
+ BasicBlock *BB;
+ BasicBlock *Pred;
+ ValVector Values;
+
+ void swap(RenamePassData &RHS) {
+ std::swap(BB, RHS.BB);
+ std::swap(Pred, RHS.Pred);
+ Values.swap(RHS.Values);
+ }
+};
+
+/// \brief This assigns and keeps a per-bb relative ordering of load/store
+/// instructions in the block that directly load or store an alloca.
+///
+/// This functionality is important because it avoids scanning large basic
+/// blocks multiple times when promoting many allocas in the same block.
+class LargeBlockInfo {
+ /// \brief For each instruction that we track, keep the index of the
+ /// instruction.
+ ///
+ /// The index starts out as the number of the instruction from the start of
+ /// the block.
+ DenseMap<const Instruction *, unsigned> InstNumbers;
+
+public:
+
+ /// This code only looks at accesses to allocas.
+ static bool isInterestingInstruction(const Instruction *I) {
+ return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
+ (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
+ }
+
+ /// Get or calculate the index of the specified instruction.
+ unsigned getInstructionIndex(const Instruction *I) {
+ assert(isInterestingInstruction(I) &&
+ "Not a load/store to/from an alloca?");
+
+ // If we already have this instruction number, return it.
+ DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
+ if (It != InstNumbers.end())
+ return It->second;
+
+ // Scan the whole block to get the instruction. This accumulates
+ // information for every interesting instruction in the block, in order to
+ // avoid gratuitus rescans.
+ const BasicBlock *BB = I->getParent();
+ unsigned InstNo = 0;
+ for (const Instruction &BBI : *BB)
+ if (isInterestingInstruction(&BBI))
+ InstNumbers[&BBI] = InstNo++;
+ It = InstNumbers.find(I);
+
+ assert(It != InstNumbers.end() && "Didn't insert instruction?");
+ return It->second;
+ }
+
+ void deleteValue(const Instruction *I) { InstNumbers.erase(I); }
+
+ void clear() { InstNumbers.clear(); }
+};
+
+struct PromoteMem2Reg {
+ /// The alloca instructions being promoted.
+ std::vector<AllocaInst *> Allocas;
+ DominatorTree &DT;
+ DIBuilder DIB;
+ /// A cache of @llvm.assume intrinsics used by SimplifyInstruction.
+ AssumptionCache *AC;
+
+ const SimplifyQuery SQ;
+ /// Reverse mapping of Allocas.
+ DenseMap<AllocaInst *, unsigned> AllocaLookup;
+
+ /// \brief The PhiNodes we're adding.
+ ///
+ /// That map is used to simplify some Phi nodes as we iterate over it, so
+ /// it should have deterministic iterators. We could use a MapVector, but
+ /// since we already maintain a map from BasicBlock* to a stable numbering
+ /// (BBNumbers), the DenseMap is more efficient (also supports removal).
+ DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes;
+
+ /// For each PHI node, keep track of which entry in Allocas it corresponds
+ /// to.
+ DenseMap<PHINode *, unsigned> PhiToAllocaMap;
+
+ /// If we are updating an AliasSetTracker, then for each alloca that is of
+ /// pointer type, we keep track of what to copyValue to the inserted PHI
+ /// nodes here.
+ std::vector<Value *> PointerAllocaValues;
+
+ /// For each alloca, we keep track of the dbg.declare intrinsic that
+ /// describes it, if any, so that we can convert it to a dbg.value
+ /// intrinsic if the alloca gets promoted.
+ SmallVector<DbgDeclareInst *, 8> AllocaDbgDeclares;
+
+ /// The set of basic blocks the renamer has already visited.
+ ///
+ SmallPtrSet<BasicBlock *, 16> Visited;
+
+ /// Contains a stable numbering of basic blocks to avoid non-determinstic
+ /// behavior.
+ DenseMap<BasicBlock *, unsigned> BBNumbers;
+
+ /// Lazily compute the number of predecessors a block has.
+ DenseMap<const BasicBlock *, unsigned> BBNumPreds;
+
+public:
+ PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
+ AssumptionCache *AC)
+ : Allocas(Allocas.begin(), Allocas.end()), DT(DT),
+ DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false),
+ AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(),
+ nullptr, &DT, AC) {}
+
+ void run();
+
+private:
+ void RemoveFromAllocasList(unsigned &AllocaIdx) {
+ Allocas[AllocaIdx] = Allocas.back();
+ Allocas.pop_back();
+ --AllocaIdx;
+ }
+
+ unsigned getNumPreds(const BasicBlock *BB) {
+ unsigned &NP = BBNumPreds[BB];
+ if (NP == 0)
+ NP = std::distance(pred_begin(BB), pred_end(BB)) + 1;
+ return NP - 1;
+ }
+
+ void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
+ const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
+ SmallPtrSetImpl<BasicBlock *> &LiveInBlocks);
+ void RenamePass(BasicBlock *BB, BasicBlock *Pred,
+ RenamePassData::ValVector &IncVals,
+ std::vector<RenamePassData> &Worklist);
+ bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
+};
+
+} // end of anonymous namespace
+
+/// Given a LoadInst LI this adds assume(LI != null) after it.
+static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
+ Function *AssumeIntrinsic =
+ Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume);
+ ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI,
+ Constant::getNullValue(LI->getType()));
+ LoadNotNull->insertAfter(LI);
+ CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull});
+ CI->insertAfter(LoadNotNull);
+ AC->registerAssumption(CI);
+}
+
+static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
+ // Knowing that this alloca is promotable, we know that it's safe to kill all
+ // instructions except for load and store.
+
+ for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) {
+ Instruction *I = cast<Instruction>(*UI);
+ ++UI;
+ if (isa<LoadInst>(I) || isa<StoreInst>(I))
+ continue;
+
+ if (!I->getType()->isVoidTy()) {
+ // The only users of this bitcast/GEP instruction are lifetime intrinsics.
+ // Follow the use/def chain to erase them now instead of leaving it for
+ // dead code elimination later.
+ for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) {
+ Instruction *Inst = cast<Instruction>(*UUI);
+ ++UUI;
+ Inst->eraseFromParent();
+ }
+ }
+ I->eraseFromParent();
+ }
+}
+
+/// \brief Rewrite as many loads as possible given a single store.
+///
+/// When there is only a single store, we can use the domtree to trivially
+/// replace all of the dominated loads with the stored value. Do so, and return
+/// true if this has successfully promoted the alloca entirely. If this returns
+/// false there were some loads which were not dominated by the single store
+/// and thus must be phi-ed with undef. We fall back to the standard alloca
+/// promotion algorithm in that case.
+static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
+ LargeBlockInfo &LBI, DominatorTree &DT,
+ AssumptionCache *AC) {
+ StoreInst *OnlyStore = Info.OnlyStore;
+ bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
+ BasicBlock *StoreBB = OnlyStore->getParent();
+ int StoreIndex = -1;
+
+ // Clear out UsingBlocks. We will reconstruct it here if needed.
+ Info.UsingBlocks.clear();
+
+ for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
+ Instruction *UserInst = cast<Instruction>(*UI++);
+ if (!isa<LoadInst>(UserInst)) {
+ assert(UserInst == OnlyStore && "Should only have load/stores");
+ continue;
+ }
+ LoadInst *LI = cast<LoadInst>(UserInst);
+
+ // Okay, if we have a load from the alloca, we want to replace it with the
+ // only value stored to the alloca. We can do this if the value is
+ // dominated by the store. If not, we use the rest of the mem2reg machinery
+ // to insert the phi nodes as needed.
+ if (!StoringGlobalVal) { // Non-instructions are always dominated.
+ if (LI->getParent() == StoreBB) {
+ // If we have a use that is in the same block as the store, compare the
+ // indices of the two instructions to see which one came first. If the
+ // load came before the store, we can't handle it.
+ if (StoreIndex == -1)
+ StoreIndex = LBI.getInstructionIndex(OnlyStore);
+
+ if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
+ // Can't handle this load, bail out.
+ Info.UsingBlocks.push_back(StoreBB);
+ continue;
+ }
+
+ } else if (LI->getParent() != StoreBB &&
+ !DT.dominates(StoreBB, LI->getParent())) {
+ // If the load and store are in different blocks, use BB dominance to
+ // check their relationships. If the store doesn't dom the use, bail
+ // out.
+ Info.UsingBlocks.push_back(LI->getParent());
+ continue;
+ }
+ }
+
+ // Otherwise, we *can* safely rewrite this load.
+ Value *ReplVal = OnlyStore->getOperand(0);
+ // If the replacement value is the load, this must occur in unreachable
+ // code.
+ if (ReplVal == LI)
+ ReplVal = UndefValue::get(LI->getType());
+
+ // If the load was marked as nonnull we don't want to lose
+ // that information when we erase this Load. So we preserve
+ // it with an assume.
+ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
+ !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
+ addAssumeNonNull(AC, LI);
+
+ LI->replaceAllUsesWith(ReplVal);
+ LI->eraseFromParent();
+ LBI.deleteValue(LI);
+ }
+
+ // Finally, after the scan, check to see if the store is all that is left.
+ if (!Info.UsingBlocks.empty())
+ return false; // If not, we'll have to fall back for the remainder.
+
+ // Record debuginfo for the store and remove the declaration's
+ // debuginfo.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+ DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
+ ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB);
+ DDI->eraseFromParent();
+ LBI.deleteValue(DDI);
+ }
+ // Remove the (now dead) store and alloca.
+ Info.OnlyStore->eraseFromParent();
+ LBI.deleteValue(Info.OnlyStore);
+
+ AI->eraseFromParent();
+ LBI.deleteValue(AI);
+ return true;
+}
+
+/// Many allocas are only used within a single basic block. If this is the
+/// case, avoid traversing the CFG and inserting a lot of potentially useless
+/// PHI nodes by just performing a single linear pass over the basic block
+/// using the Alloca.
+///
+/// If we cannot promote this alloca (because it is read before it is written),
+/// return false. This is necessary in cases where, due to control flow, the
+/// alloca is undefined only on some control flow paths. e.g. code like
+/// this is correct in LLVM IR:
+/// // A is an alloca with no stores so far
+/// for (...) {
+/// int t = *A;
+/// if (!first_iteration)
+/// use(t);
+/// *A = 42;
+/// }
+static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
+ LargeBlockInfo &LBI,
+ DominatorTree &DT,
+ AssumptionCache *AC) {
+ // The trickiest case to handle is when we have large blocks. Because of this,
+ // this code is optimized assuming that large blocks happen. This does not
+ // significantly pessimize the small block case. This uses LargeBlockInfo to
+ // make it efficient to get the index of various operations in the block.
+
+ // Walk the use-def list of the alloca, getting the locations of all stores.
+ typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy;
+ StoresByIndexTy StoresByIndex;
+
+ for (User *U : AI->users())
+ if (StoreInst *SI = dyn_cast<StoreInst>(U))
+ StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
+
+ // Sort the stores by their index, making it efficient to do a lookup with a
+ // binary search.
+ std::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first());
+
+ // Walk all of the loads from this alloca, replacing them with the nearest
+ // store above them, if any.
+ for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI++);
+ if (!LI)
+ continue;
+
+ unsigned LoadIdx = LBI.getInstructionIndex(LI);
+
+ // Find the nearest store that has a lower index than this load.
+ StoresByIndexTy::iterator I =
+ std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
+ std::make_pair(LoadIdx,
+ static_cast<StoreInst *>(nullptr)),
+ less_first());
+ if (I == StoresByIndex.begin()) {
+ if (StoresByIndex.empty())
+ // If there are no stores, the load takes the undef value.
+ LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+ else
+ // There is no store before this load, bail out (load may be affected
+ // by the following stores - see main comment).
+ return false;
+ } else {
+ // Otherwise, there was a store before this load, the load takes its value.
+ // Note, if the load was marked as nonnull we don't want to lose that
+ // information when we erase it. So we preserve it with an assume.
+ Value *ReplVal = std::prev(I)->second->getOperand(0);
+ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
+ !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
+ addAssumeNonNull(AC, LI);
+
+ LI->replaceAllUsesWith(ReplVal);
+ }
+
+ LI->eraseFromParent();
+ LBI.deleteValue(LI);
+ }
+
+ // Remove the (now dead) stores and alloca.
+ while (!AI->use_empty()) {
+ StoreInst *SI = cast<StoreInst>(AI->user_back());
+ // Record debuginfo for the store before removing it.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+ DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
+ ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+ }
+ SI->eraseFromParent();
+ LBI.deleteValue(SI);
+ }
+
+ AI->eraseFromParent();
+ LBI.deleteValue(AI);
+
+ // The alloca's debuginfo can be removed as well.
+ if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+ DDI->eraseFromParent();
+ LBI.deleteValue(DDI);
+ }
+
+ ++NumLocalPromoted;
+ return true;
+}
+
+void PromoteMem2Reg::run() {
+ Function &F = *DT.getRoot()->getParent();
+
+ AllocaDbgDeclares.resize(Allocas.size());
+
+ AllocaInfo Info;
+ LargeBlockInfo LBI;
+ ForwardIDFCalculator IDF(DT);
+
+ for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
+ AllocaInst *AI = Allocas[AllocaNum];
+
+ assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!");
+ assert(AI->getParent()->getParent() == &F &&
+ "All allocas should be in the same function, which is same as DF!");
+
+ removeLifetimeIntrinsicUsers(AI);
+
+ if (AI->use_empty()) {
+ // If there are no uses of the alloca, just delete it now.
+ AI->eraseFromParent();
+
+ // Remove the alloca from the Allocas list, since it has been processed
+ RemoveFromAllocasList(AllocaNum);
+ ++NumDeadAlloca;
+ continue;
+ }
+
+ // Calculate the set of read and write-locations for each alloca. This is
+ // analogous to finding the 'uses' and 'definitions' of each variable.
+ Info.AnalyzeAlloca(AI);
+
+ // If there is only a single store to this value, replace any loads of
+ // it that are directly dominated by the definition with the value stored.
+ if (Info.DefiningBlocks.size() == 1) {
+ if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AC)) {
+ // The alloca has been processed, move on.
+ RemoveFromAllocasList(AllocaNum);
+ ++NumSingleStore;
+ continue;
+ }
+ }
+
+ // If the alloca is only read and written in one basic block, just perform a
+ // linear sweep over the block to eliminate it.
+ if (Info.OnlyUsedInOneBlock &&
+ promoteSingleBlockAlloca(AI, Info, LBI, DT, AC)) {
+ // The alloca has been processed, move on.
+ RemoveFromAllocasList(AllocaNum);
+ continue;
+ }
+
+ // If we haven't computed a numbering for the BB's in the function, do so
+ // now.
+ if (BBNumbers.empty()) {
+ unsigned ID = 0;
+ for (auto &BB : F)
+ BBNumbers[&BB] = ID++;
+ }
+
+ // Remember the dbg.declare intrinsic describing this alloca, if any.
+ if (Info.DbgDeclare)
+ AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
+
+ // Keep the reverse mapping of the 'Allocas' array for the rename pass.
+ AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
+
+ // At this point, we're committed to promoting the alloca using IDF's, and
+ // the standard SSA construction algorithm. Determine which blocks need PHI
+ // nodes and see if we can optimize out some work by avoiding insertion of
+ // dead phi nodes.
+
+
+ // Unique the set of defining blocks for efficient lookup.
+ SmallPtrSet<BasicBlock *, 32> DefBlocks;
+ DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
+
+ // Determine which blocks the value is live in. These are blocks which lead
+ // to uses.
+ SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
+ ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
+
+ // At this point, we're committed to promoting the alloca using IDF's, and
+ // the standard SSA construction algorithm. Determine which blocks need phi
+ // nodes and see if we can optimize out some work by avoiding insertion of
+ // dead phi nodes.
+ IDF.setLiveInBlocks(LiveInBlocks);
+ IDF.setDefiningBlocks(DefBlocks);
+ SmallVector<BasicBlock *, 32> PHIBlocks;
+ IDF.calculate(PHIBlocks);
+ if (PHIBlocks.size() > 1)
+ std::sort(PHIBlocks.begin(), PHIBlocks.end(),
+ [this](BasicBlock *A, BasicBlock *B) {
+ return BBNumbers.lookup(A) < BBNumbers.lookup(B);
+ });
+
+ unsigned CurrentVersion = 0;
+ for (unsigned i = 0, e = PHIBlocks.size(); i != e; ++i)
+ QueuePhiNode(PHIBlocks[i], AllocaNum, CurrentVersion);
+ }
+
+ if (Allocas.empty())
+ return; // All of the allocas must have been trivial!
+
+ LBI.clear();
+
+ // Set the incoming values for the basic block to be null values for all of
+ // the alloca's. We do this in case there is a load of a value that has not
+ // been stored yet. In this case, it will get this null value.
+ //
+ RenamePassData::ValVector Values(Allocas.size());
+ for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
+ Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
+
+ // Walks all basic blocks in the function performing the SSA rename algorithm
+ // and inserting the phi nodes we marked as necessary
+ //
+ std::vector<RenamePassData> RenamePassWorkList;
+ RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values));
+ do {
+ RenamePassData RPD;
+ RPD.swap(RenamePassWorkList.back());
+ RenamePassWorkList.pop_back();
+ // RenamePass may add new worklist entries.
+ RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
+ } while (!RenamePassWorkList.empty());
+
+ // The renamer uses the Visited set to avoid infinite loops. Clear it now.
+ Visited.clear();
+
+ // Remove the allocas themselves from the function.
+ for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
+ Instruction *A = Allocas[i];
+
+ // If there are any uses of the alloca instructions left, they must be in
+ // unreachable basic blocks that were not processed by walking the dominator
+ // tree. Just delete the users now.
+ if (!A->use_empty())
+ A->replaceAllUsesWith(UndefValue::get(A->getType()));
+ A->eraseFromParent();
+ }
+
+ // Remove alloca's dbg.declare instrinsics from the function.
+ for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
+ if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
+ DDI->eraseFromParent();
+
+ // Loop over all of the PHI nodes and see if there are any that we can get
+ // rid of because they merge all of the same incoming values. This can
+ // happen due to undef values coming into the PHI nodes. This process is
+ // iterative, because eliminating one PHI node can cause others to be removed.
+ bool EliminatedAPHI = true;
+ while (EliminatedAPHI) {
+ EliminatedAPHI = false;
+
+ // Iterating over NewPhiNodes is deterministic, so it is safe to try to
+ // simplify and RAUW them as we go. If it was not, we could add uses to
+ // the values we replace with in a non-deterministic order, thus creating
+ // non-deterministic def->use chains.
+ for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
+ I = NewPhiNodes.begin(),
+ E = NewPhiNodes.end();
+ I != E;) {
+ PHINode *PN = I->second;
+
+ // If this PHI node merges one value and/or undefs, get the value.
+ if (Value *V = SimplifyInstruction(PN, SQ)) {
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ NewPhiNodes.erase(I++);
+ EliminatedAPHI = true;
+ continue;
+ }
+ ++I;
+ }
+ }
+
+ // At this point, the renamer has added entries to PHI nodes for all reachable
+ // code. Unfortunately, there may be unreachable blocks which the renamer
+ // hasn't traversed. If this is the case, the PHI nodes may not
+ // have incoming values for all predecessors. Loop over all PHI nodes we have
+ // created, inserting undef values if they are missing any incoming values.
+ //
+ for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
+ I = NewPhiNodes.begin(),
+ E = NewPhiNodes.end();
+ I != E; ++I) {
+ // We want to do this once per basic block. As such, only process a block
+ // when we find the PHI that is the first entry in the block.
+ PHINode *SomePHI = I->second;
+ BasicBlock *BB = SomePHI->getParent();
+ if (&BB->front() != SomePHI)
+ continue;
+
+ // Only do work here if there the PHI nodes are missing incoming values. We
+ // know that all PHI nodes that were inserted in a block will have the same
+ // number of incoming values, so we can just check any of them.
+ if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
+ continue;
+
+ // Get the preds for BB.
+ SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
+
+ // Ok, now we know that all of the PHI nodes are missing entries for some
+ // basic blocks. Start by sorting the incoming predecessors for efficient
+ // access.
+ std::sort(Preds.begin(), Preds.end());
+
+ // Now we loop through all BB's which have entries in SomePHI and remove
+ // them from the Preds list.
+ for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
+ // Do a log(n) search of the Preds list for the entry we want.
+ SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound(
+ Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i));
+ assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
+ "PHI node has entry for a block which is not a predecessor!");
+
+ // Remove the entry
+ Preds.erase(EntIt);
+ }
+
+ // At this point, the blocks left in the preds list must have dummy
+ // entries inserted into every PHI nodes for the block. Update all the phi
+ // nodes in this block that we are inserting (there could be phis before
+ // mem2reg runs).
+ unsigned NumBadPreds = SomePHI->getNumIncomingValues();
+ BasicBlock::iterator BBI = BB->begin();
+ while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
+ SomePHI->getNumIncomingValues() == NumBadPreds) {
+ Value *UndefVal = UndefValue::get(SomePHI->getType());
+ for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
+ SomePHI->addIncoming(UndefVal, Preds[pred]);
+ }
+ }
+
+ NewPhiNodes.clear();
+}
+
+/// \brief Determine which blocks the value is live in.
+///
+/// These are blocks which lead to uses. Knowing this allows us to avoid
+/// inserting PHI nodes into blocks which don't lead to uses (thus, the
+/// inserted phi nodes would be dead).
+void PromoteMem2Reg::ComputeLiveInBlocks(
+ AllocaInst *AI, AllocaInfo &Info,
+ const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
+ SmallPtrSetImpl<BasicBlock *> &LiveInBlocks) {
+
+ // To determine liveness, we must iterate through the predecessors of blocks
+ // where the def is live. Blocks are added to the worklist if we need to
+ // check their predecessors. Start with all the using blocks.
+ SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
+ Info.UsingBlocks.end());
+
+ // If any of the using blocks is also a definition block, check to see if the
+ // definition occurs before or after the use. If it happens before the use,
+ // the value isn't really live-in.
+ for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
+ BasicBlock *BB = LiveInBlockWorklist[i];
+ if (!DefBlocks.count(BB))
+ continue;
+
+ // Okay, this is a block that both uses and defines the value. If the first
+ // reference to the alloca is a def (store), then we know it isn't live-in.
+ for (BasicBlock::iterator I = BB->begin();; ++I) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (SI->getOperand(1) != AI)
+ continue;
+
+ // We found a store to the alloca before a load. The alloca is not
+ // actually live-in here.
+ LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
+ LiveInBlockWorklist.pop_back();
+ --i;
+ --e;
+ break;
+ }
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (LI->getOperand(0) != AI)
+ continue;
+
+ // Okay, we found a load before a store to the alloca. It is actually
+ // live into this block.
+ break;
+ }
+ }
+ }
+
+ // Now that we have a set of blocks where the phi is live-in, recursively add
+ // their predecessors until we find the full region the value is live.
+ while (!LiveInBlockWorklist.empty()) {
+ BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
+
+ // The block really is live in here, insert it into the set. If already in
+ // the set, then it has already been processed.
+ if (!LiveInBlocks.insert(BB).second)
+ continue;
+
+ // Since the value is live into BB, it is either defined in a predecessor or
+ // live into it to. Add the preds to the worklist unless they are a
+ // defining block.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+
+ // The value is not live into a predecessor if it defines the value.
+ if (DefBlocks.count(P))
+ continue;
+
+ // Otherwise it is, add to the worklist.
+ LiveInBlockWorklist.push_back(P);
+ }
+ }
+}
+
+/// \brief Queue a phi-node to be added to a basic-block for a specific Alloca.
+///
+/// Returns true if there wasn't already a phi-node for that variable
+bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
+ unsigned &Version) {
+ // Look up the basic-block in question.
+ PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)];
+
+ // If the BB already has a phi node added for the i'th alloca then we're done!
+ if (PN)
+ return false;
+
+ // Create a PhiNode using the dereferenced type... and add the phi-node to the
+ // BasicBlock.
+ PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
+ Allocas[AllocaNo]->getName() + "." + Twine(Version++),
+ &BB->front());
+ ++NumPHIInsert;
+ PhiToAllocaMap[PN] = AllocaNo;
+ return true;
+}
+
+/// \brief Recursively traverse the CFG of the function, renaming loads and
+/// stores to the allocas which we are promoting.
+///
+/// IncomingVals indicates what value each Alloca contains on exit from the
+/// predecessor block Pred.
+void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
+ RenamePassData::ValVector &IncomingVals,
+ std::vector<RenamePassData> &Worklist) {
+NextIteration:
+ // If we are inserting any phi nodes into this BB, they will already be in the
+ // block.
+ if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
+ // If we have PHI nodes to update, compute the number of edges from Pred to
+ // BB.
+ if (PhiToAllocaMap.count(APN)) {
+ // We want to be able to distinguish between PHI nodes being inserted by
+ // this invocation of mem2reg from those phi nodes that already existed in
+ // the IR before mem2reg was run. We determine that APN is being inserted
+ // because it is missing incoming edges. All other PHI nodes being
+ // inserted by this pass of mem2reg will have the same number of incoming
+ // operands so far. Remember this count.
+ unsigned NewPHINumOperands = APN->getNumOperands();
+
+ unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB);
+ assert(NumEdges && "Must be at least one edge from Pred to BB!");
+
+ // Add entries for all the phis.
+ BasicBlock::iterator PNI = BB->begin();
+ do {
+ unsigned AllocaNo = PhiToAllocaMap[APN];
+
+ // Add N incoming values to the PHI node.
+ for (unsigned i = 0; i != NumEdges; ++i)
+ APN->addIncoming(IncomingVals[AllocaNo], Pred);
+
+ // The currently active variable for this block is now the PHI.
+ IncomingVals[AllocaNo] = APN;
+ if (DbgDeclareInst *DDI = AllocaDbgDeclares[AllocaNo])
+ ConvertDebugDeclareToDebugValue(DDI, APN, DIB);
+
+ // Get the next phi node.
+ ++PNI;
+ APN = dyn_cast<PHINode>(PNI);
+ if (!APN)
+ break;
+
+ // Verify that it is missing entries. If not, it is not being inserted
+ // by this mem2reg invocation so we want to ignore it.
+ } while (APN->getNumOperands() == NewPHINumOperands);
+ }
+ }
+
+ // Don't revisit blocks.
+ if (!Visited.insert(BB).second)
+ return;
+
+ for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) {
+ Instruction *I = &*II++; // get the instruction, increment iterator
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
+ if (!Src)
+ continue;
+
+ DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src);
+ if (AI == AllocaLookup.end())
+ continue;
+
+ Value *V = IncomingVals[AI->second];
+
+ // If the load was marked as nonnull we don't want to lose
+ // that information when we erase this Load. So we preserve
+ // it with an assume.
+ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
+ !llvm::isKnownNonNullAt(V, LI, &DT))
+ addAssumeNonNull(AC, LI);
+
+ // Anything using the load now uses the current value.
+ LI->replaceAllUsesWith(V);
+ BB->getInstList().erase(LI);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Delete this instruction and mark the name as the current holder of the
+ // value
+ AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
+ if (!Dest)
+ continue;
+
+ DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
+ if (ai == AllocaLookup.end())
+ continue;
+
+ // what value were we writing?
+ IncomingVals[ai->second] = SI->getOperand(0);
+ // Record debuginfo for the store before removing it.
+ if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second])
+ ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+ BB->getInstList().erase(SI);
+ }
+ }
+
+ // 'Recurse' to our successors.
+ succ_iterator I = succ_begin(BB), E = succ_end(BB);
+ if (I == E)
+ return;
+
+ // Keep track of the successors so we don't visit the same successor twice
+ SmallPtrSet<BasicBlock *, 8> VisitedSuccs;
+
+ // Handle the first successor without using the worklist.
+ VisitedSuccs.insert(*I);
+ Pred = BB;
+ BB = *I;
+ ++I;
+
+ for (; I != E; ++I)
+ if (VisitedSuccs.insert(*I).second)
+ Worklist.emplace_back(*I, Pred, IncomingVals);
+
+ goto NextIteration;
+}
+
+void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
+ AssumptionCache *AC) {
+ // If there is nothing to do, bail out...
+ if (Allocas.empty())
+ return;
+
+ PromoteMem2Reg(Allocas, DT, AC).run();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
new file mode 100644
index 000000000000..6ccf54e49dd3
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -0,0 +1,495 @@
+//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SSAUpdater class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+#include <cassert>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ssaupdater"
+
+typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
+static AvailableValsTy &getAvailableVals(void *AV) {
+ return *static_cast<AvailableValsTy*>(AV);
+}
+
+SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
+ : InsertedPHIs(NewPHI) {}
+
+SSAUpdater::~SSAUpdater() {
+ delete static_cast<AvailableValsTy*>(AV);
+}
+
+void SSAUpdater::Initialize(Type *Ty, StringRef Name) {
+ if (!AV)
+ AV = new AvailableValsTy();
+ else
+ getAvailableVals(AV).clear();
+ ProtoType = Ty;
+ ProtoName = Name;
+}
+
+bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
+ return getAvailableVals(AV).count(BB);
+}
+
+void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
+ assert(ProtoType && "Need to initialize SSAUpdater");
+ assert(ProtoType == V->getType() &&
+ "All rewritten values must have the same type");
+ getAvailableVals(AV)[BB] = V;
+}
+
+static bool IsEquivalentPHI(PHINode *PHI,
+ SmallDenseMap<BasicBlock*, Value*, 8> &ValueMapping) {
+ unsigned PHINumValues = PHI->getNumIncomingValues();
+ if (PHINumValues != ValueMapping.size())
+ return false;
+
+ // Scan the phi to see if it matches.
+ for (unsigned i = 0, e = PHINumValues; i != e; ++i)
+ if (ValueMapping[PHI->getIncomingBlock(i)] !=
+ PHI->getIncomingValue(i)) {
+ return false;
+ }
+
+ return true;
+}
+
+Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
+ Value *Res = GetValueAtEndOfBlockInternal(BB);
+ return Res;
+}
+
+Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
+ // If there is no definition of the renamed variable in this block, just use
+ // GetValueAtEndOfBlock to do our work.
+ if (!HasValueForBlock(BB))
+ return GetValueAtEndOfBlock(BB);
+
+ // Otherwise, we have the hard case. Get the live-in values for each
+ // predecessor.
+ SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues;
+ Value *SingularValue = nullptr;
+
+ // We can get our predecessor info by walking the pred_iterator list, but it
+ // is relatively slow. If we already have PHI nodes in this block, walk one
+ // of them to get the predecessor list instead.
+ if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+ for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
+ Value *PredVal = GetValueAtEndOfBlock(PredBB);
+ PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (i == 0)
+ SingularValue = PredVal;
+ else if (PredVal != SingularValue)
+ SingularValue = nullptr;
+ }
+ } else {
+ bool isFirstPred = true;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *PredBB = *PI;
+ Value *PredVal = GetValueAtEndOfBlock(PredBB);
+ PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (isFirstPred) {
+ SingularValue = PredVal;
+ isFirstPred = false;
+ } else if (PredVal != SingularValue)
+ SingularValue = nullptr;
+ }
+ }
+
+ // If there are no predecessors, just return undef.
+ if (PredValues.empty())
+ return UndefValue::get(ProtoType);
+
+ // Otherwise, if all the merged values are the same, just use it.
+ if (SingularValue)
+ return SingularValue;
+
+ // Otherwise, we do need a PHI: check to see if we already have one available
+ // in this block that produces the right value.
+ if (isa<PHINode>(BB->begin())) {
+ SmallDenseMap<BasicBlock*, Value*, 8> ValueMapping(PredValues.begin(),
+ PredValues.end());
+ PHINode *SomePHI;
+ for (BasicBlock::iterator It = BB->begin();
+ (SomePHI = dyn_cast<PHINode>(It)); ++It) {
+ if (IsEquivalentPHI(SomePHI, ValueMapping))
+ return SomePHI;
+ }
+ }
+
+ // Ok, we have no way out, insert a new one now.
+ PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(),
+ ProtoName, &BB->front());
+
+ // Fill in all the predecessors of the PHI.
+ for (const auto &PredValue : PredValues)
+ InsertedPHI->addIncoming(PredValue.second, PredValue.first);
+
+ // See if the PHI node can be merged to a single value. This can happen in
+ // loop cases when we get a PHI of itself and one other value.
+ if (Value *V =
+ SimplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) {
+ InsertedPHI->eraseFromParent();
+ return V;
+ }
+
+ // Set the DebugLoc of the inserted PHI, if available.
+ DebugLoc DL;
+ if (const Instruction *I = BB->getFirstNonPHI())
+ DL = I->getDebugLoc();
+ InsertedPHI->setDebugLoc(DL);
+
+ // If the client wants to know about all new instructions, tell it.
+ if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+
+ DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
+ return InsertedPHI;
+}
+
+void SSAUpdater::RewriteUse(Use &U) {
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ Value *V;
+ if (PHINode *UserPN = dyn_cast<PHINode>(User))
+ V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+ else
+ V = GetValueInMiddleOfBlock(User->getParent());
+
+ // Notify that users of the existing value that it is being replaced.
+ Value *OldVal = U.get();
+ if (OldVal != V && OldVal->hasValueHandle())
+ ValueHandleBase::ValueIsRAUWd(OldVal, V);
+
+ U.set(V);
+}
+
+void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ Value *V;
+ if (PHINode *UserPN = dyn_cast<PHINode>(User))
+ V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+ else
+ V = GetValueAtEndOfBlock(User->getParent());
+
+ U.set(V);
+}
+
+namespace llvm {
+
+template<>
+class SSAUpdaterTraits<SSAUpdater> {
+public:
+ typedef BasicBlock BlkT;
+ typedef Value *ValT;
+ typedef PHINode PhiT;
+
+ typedef succ_iterator BlkSucc_iterator;
+ static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); }
+ static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); }
+
+ class PHI_iterator {
+ private:
+ PHINode *PHI;
+ unsigned idx;
+
+ public:
+ explicit PHI_iterator(PHINode *P) // begin iterator
+ : PHI(P), idx(0) {}
+ PHI_iterator(PHINode *P, bool) // end iterator
+ : PHI(P), idx(PHI->getNumIncomingValues()) {}
+
+ PHI_iterator &operator++() { ++idx; return *this; }
+ bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
+ bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
+
+ Value *getIncomingValue() { return PHI->getIncomingValue(idx); }
+ BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); }
+ };
+
+ static PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+ static PHI_iterator PHI_end(PhiT *PHI) {
+ return PHI_iterator(PHI, true);
+ }
+
+ /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
+ /// vector, set Info->NumPreds, and allocate space in Info->Preds.
+ static void FindPredecessorBlocks(BasicBlock *BB,
+ SmallVectorImpl<BasicBlock*> *Preds) {
+ // We can get our predecessor info by walking the pred_iterator list,
+ // but it is relatively slow. If we already have PHI nodes in this
+ // block, walk one of them to get the predecessor list instead.
+ if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+ Preds->append(SomePhi->block_begin(), SomePhi->block_end());
+ } else {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Preds->push_back(*PI);
+ }
+ }
+
+ /// GetUndefVal - Get an undefined value of the same type as the value
+ /// being handled.
+ static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) {
+ return UndefValue::get(Updater->ProtoType);
+ }
+
+ /// CreateEmptyPHI - Create a new PHI instruction in the specified block.
+ /// Reserve space for the operands but do not fill them in yet.
+ static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
+ SSAUpdater *Updater) {
+ PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds,
+ Updater->ProtoName, &BB->front());
+ return PHI;
+ }
+
+ /// AddPHIOperand - Add the specified value as an operand of the PHI for
+ /// the specified predecessor block.
+ static void AddPHIOperand(PHINode *PHI, Value *Val, BasicBlock *Pred) {
+ PHI->addIncoming(Val, Pred);
+ }
+
+ /// InstrIsPHI - Check if an instruction is a PHI.
+ ///
+ static PHINode *InstrIsPHI(Instruction *I) {
+ return dyn_cast<PHINode>(I);
+ }
+
+ /// ValueIsPHI - Check if a value is a PHI.
+ ///
+ static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) {
+ return dyn_cast<PHINode>(Val);
+ }
+
+ /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+ /// operands, i.e., it was just added.
+ static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) {
+ PHINode *PHI = ValueIsPHI(Val, Updater);
+ if (PHI && PHI->getNumIncomingValues() == 0)
+ return PHI;
+ return nullptr;
+ }
+
+ /// GetPHIValue - For the specified PHI instruction, return the value
+ /// that it defines.
+ static Value *GetPHIValue(PHINode *PHI) {
+ return PHI;
+ }
+};
+
+} // end namespace llvm
+
+/// Check to see if AvailableVals has an entry for the specified BB and if so,
+/// return it. If not, construct SSA form by first calculating the required
+/// placement of PHIs and then inserting new PHIs where needed.
+Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+ if (Value *V = AvailableVals[BB])
+ return V;
+
+ SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+ return Impl.GetValue(BB);
+}
+
+//===----------------------------------------------------------------------===//
+// LoadAndStorePromoter Implementation
+//===----------------------------------------------------------------------===//
+
+LoadAndStorePromoter::
+LoadAndStorePromoter(ArrayRef<const Instruction*> Insts,
+ SSAUpdater &S, StringRef BaseName) : SSA(S) {
+ if (Insts.empty()) return;
+
+ const Value *SomeVal;
+ if (const LoadInst *LI = dyn_cast<LoadInst>(Insts[0]))
+ SomeVal = LI;
+ else
+ SomeVal = cast<StoreInst>(Insts[0])->getOperand(0);
+
+ if (BaseName.empty())
+ BaseName = SomeVal->getName();
+ SSA.Initialize(SomeVal->getType(), BaseName);
+}
+
+void LoadAndStorePromoter::
+run(const SmallVectorImpl<Instruction*> &Insts) const {
+ // First step: bucket up uses of the alloca by the block they occur in.
+ // This is important because we have to handle multiple defs/uses in a block
+ // ourselves: SSAUpdater is purely for cross-block references.
+ DenseMap<BasicBlock*, TinyPtrVector<Instruction*>> UsesByBlock;
+
+ for (Instruction *User : Insts)
+ UsesByBlock[User->getParent()].push_back(User);
+
+ // Okay, now we can iterate over all the blocks in the function with uses,
+ // processing them. Keep track of which loads are loading a live-in value.
+ // Walk the uses in the use-list order to be determinstic.
+ SmallVector<LoadInst*, 32> LiveInLoads;
+ DenseMap<Value*, Value*> ReplacedLoads;
+
+ for (Instruction *User : Insts) {
+ BasicBlock *BB = User->getParent();
+ TinyPtrVector<Instruction*> &BlockUses = UsesByBlock[BB];
+
+ // If this block has already been processed, ignore this repeat use.
+ if (BlockUses.empty()) continue;
+
+ // Okay, this is the first use in the block. If this block just has a
+ // single user in it, we can rewrite it trivially.
+ if (BlockUses.size() == 1) {
+ // If it is a store, it is a trivial def of the value in the block.
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ updateDebugInfo(SI);
+ SSA.AddAvailableValue(BB, SI->getOperand(0));
+ } else
+ // Otherwise it is a load, queue it to rewrite as a live-in load.
+ LiveInLoads.push_back(cast<LoadInst>(User));
+ BlockUses.clear();
+ continue;
+ }
+
+ // Otherwise, check to see if this block is all loads.
+ bool HasStore = false;
+ for (Instruction *I : BlockUses) {
+ if (isa<StoreInst>(I)) {
+ HasStore = true;
+ break;
+ }
+ }
+
+ // If so, we can queue them all as live in loads. We don't have an
+ // efficient way to tell which on is first in the block and don't want to
+ // scan large blocks, so just add all loads as live ins.
+ if (!HasStore) {
+ for (Instruction *I : BlockUses)
+ LiveInLoads.push_back(cast<LoadInst>(I));
+ BlockUses.clear();
+ continue;
+ }
+
+ // Otherwise, we have mixed loads and stores (or just a bunch of stores).
+ // Since SSAUpdater is purely for cross-block values, we need to determine
+ // the order of these instructions in the block. If the first use in the
+ // block is a load, then it uses the live in value. The last store defines
+ // the live out value. We handle this by doing a linear scan of the block.
+ Value *StoredValue = nullptr;
+ for (Instruction &I : *BB) {
+ if (LoadInst *L = dyn_cast<LoadInst>(&I)) {
+ // If this is a load from an unrelated pointer, ignore it.
+ if (!isInstInList(L, Insts)) continue;
+
+ // If we haven't seen a store yet, this is a live in use, otherwise
+ // use the stored value.
+ if (StoredValue) {
+ replaceLoadWithValue(L, StoredValue);
+ L->replaceAllUsesWith(StoredValue);
+ ReplacedLoads[L] = StoredValue;
+ } else {
+ LiveInLoads.push_back(L);
+ }
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ // If this is a store to an unrelated pointer, ignore it.
+ if (!isInstInList(SI, Insts)) continue;
+ updateDebugInfo(SI);
+
+ // Remember that this is the active value in the block.
+ StoredValue = SI->getOperand(0);
+ }
+ }
+
+ // The last stored value that happened is the live-out for the block.
+ assert(StoredValue && "Already checked that there is a store in block");
+ SSA.AddAvailableValue(BB, StoredValue);
+ BlockUses.clear();
+ }
+
+ // Okay, now we rewrite all loads that use live-in values in the loop,
+ // inserting PHI nodes as necessary.
+ for (LoadInst *ALoad : LiveInLoads) {
+ Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
+ replaceLoadWithValue(ALoad, NewVal);
+
+ // Avoid assertions in unreachable code.
+ if (NewVal == ALoad) NewVal = UndefValue::get(NewVal->getType());
+ ALoad->replaceAllUsesWith(NewVal);
+ ReplacedLoads[ALoad] = NewVal;
+ }
+
+ // Allow the client to do stuff before we start nuking things.
+ doExtraRewritesBeforeFinalDeletion();
+
+ // Now that everything is rewritten, delete the old instructions from the
+ // function. They should all be dead now.
+ for (Instruction *User : Insts) {
+ // If this is a load that still has uses, then the load must have been added
+ // as a live value in the SSAUpdate data structure for a block (e.g. because
+ // the loaded value was stored later). In this case, we need to recursively
+ // propagate the updates until we get to the real value.
+ if (!User->use_empty()) {
+ Value *NewVal = ReplacedLoads[User];
+ assert(NewVal && "not a replaced load?");
+
+ // Propagate down to the ultimate replacee. The intermediately loads
+ // could theoretically already have been deleted, so we don't want to
+ // dereference the Value*'s.
+ DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
+ while (RLI != ReplacedLoads.end()) {
+ NewVal = RLI->second;
+ RLI = ReplacedLoads.find(NewVal);
+ }
+
+ replaceLoadWithValue(cast<LoadInst>(User), NewVal);
+ User->replaceAllUsesWith(NewVal);
+ }
+
+ instructionDeleted(User);
+ User->eraseFromParent();
+ }
+}
+
+bool
+LoadAndStorePromoter::isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &Insts)
+ const {
+ return is_contained(Insts, I);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp b/contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp
new file mode 100644
index 000000000000..8c23957ac43e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp
@@ -0,0 +1,108 @@
+//===- SanitizerStats.cpp - Sanitizer statistics gathering ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements code generation for sanitizer statistics gathering.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SanitizerStats.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+SanitizerStatReport::SanitizerStatReport(Module *M) : M(M) {
+ StatTy = ArrayType::get(Type::getInt8PtrTy(M->getContext()), 2);
+ EmptyModuleStatsTy = makeModuleStatsTy();
+
+ ModuleStatsGV = new GlobalVariable(*M, EmptyModuleStatsTy, false,
+ GlobalValue::InternalLinkage, nullptr);
+}
+
+ArrayType *SanitizerStatReport::makeModuleStatsArrayTy() {
+ return ArrayType::get(StatTy, Inits.size());
+}
+
+StructType *SanitizerStatReport::makeModuleStatsTy() {
+ return StructType::get(M->getContext(), {Type::getInt8PtrTy(M->getContext()),
+ Type::getInt32Ty(M->getContext()),
+ makeModuleStatsArrayTy()});
+}
+
+void SanitizerStatReport::create(IRBuilder<> &B, SanitizerStatKind SK) {
+ Function *F = B.GetInsertBlock()->getParent();
+ Module *M = F->getParent();
+ PointerType *Int8PtrTy = B.getInt8PtrTy();
+ IntegerType *IntPtrTy = B.getIntPtrTy(M->getDataLayout());
+ ArrayType *StatTy = ArrayType::get(Int8PtrTy, 2);
+
+ Inits.push_back(ConstantArray::get(
+ StatTy,
+ {Constant::getNullValue(Int8PtrTy),
+ ConstantExpr::getIntToPtr(
+ ConstantInt::get(IntPtrTy, uint64_t(SK) << (IntPtrTy->getBitWidth() -
+ kSanitizerStatKindBits)),
+ Int8PtrTy)}));
+
+ FunctionType *StatReportTy =
+ FunctionType::get(B.getVoidTy(), Int8PtrTy, false);
+ Constant *StatReport = M->getOrInsertFunction(
+ "__sanitizer_stat_report", StatReportTy);
+
+ auto InitAddr = ConstantExpr::getGetElementPtr(
+ EmptyModuleStatsTy, ModuleStatsGV,
+ ArrayRef<Constant *>{
+ ConstantInt::get(IntPtrTy, 0), ConstantInt::get(B.getInt32Ty(), 2),
+ ConstantInt::get(IntPtrTy, Inits.size() - 1),
+ });
+ B.CreateCall(StatReport, ConstantExpr::getBitCast(InitAddr, Int8PtrTy));
+}
+
+void SanitizerStatReport::finish() {
+ if (Inits.empty()) {
+ ModuleStatsGV->eraseFromParent();
+ return;
+ }
+
+ PointerType *Int8PtrTy = Type::getInt8PtrTy(M->getContext());
+ IntegerType *Int32Ty = Type::getInt32Ty(M->getContext());
+ Type *VoidTy = Type::getVoidTy(M->getContext());
+
+ // Create a new ModuleStatsGV to replace the old one. We can't just set the
+ // old one's initializer because its type is different.
+ auto NewModuleStatsGV = new GlobalVariable(
+ *M, makeModuleStatsTy(), false, GlobalValue::InternalLinkage,
+ ConstantStruct::getAnon(
+ {Constant::getNullValue(Int8PtrTy),
+ ConstantInt::get(Int32Ty, Inits.size()),
+ ConstantArray::get(makeModuleStatsArrayTy(), Inits)}));
+ ModuleStatsGV->replaceAllUsesWith(
+ ConstantExpr::getBitCast(NewModuleStatsGV, ModuleStatsGV->getType()));
+ ModuleStatsGV->eraseFromParent();
+
+ // Create a global constructor to register NewModuleStatsGV.
+ auto F = Function::Create(FunctionType::get(VoidTy, false),
+ GlobalValue::InternalLinkage, "", M);
+ auto BB = BasicBlock::Create(M->getContext(), "", F);
+ IRBuilder<> B(BB);
+
+ FunctionType *StatInitTy = FunctionType::get(VoidTy, Int8PtrTy, false);
+ Constant *StatInit = M->getOrInsertFunction(
+ "__sanitizer_stat_init", StatInitTy);
+
+ B.CreateCall(StatInit, ConstantExpr::getBitCast(NewModuleStatsGV, Int8PtrTy));
+ B.CreateRetVoid();
+
+ appendToGlobalCtors(*M, F, 0);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
new file mode 100644
index 000000000000..8784b9702141
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -0,0 +1,5998 @@
+//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Peephole optimize the CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/NoFolder.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <map>
+#include <set>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "simplifycfg"
+
+// Chosen as 2 so as to be cheap, but still to have enough power to fold
+// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
+// To catch this, we need to fold a compare and a select, hence '2' being the
+// minimum reasonable default.
+static cl::opt<unsigned> PHINodeFoldingThreshold(
+ "phi-node-folding-threshold", cl::Hidden, cl::init(2),
+ cl::desc(
+ "Control the amount of phi node folding to perform (default = 2)"));
+
+static cl::opt<bool> DupRet(
+ "simplifycfg-dup-ret", cl::Hidden, cl::init(false),
+ cl::desc("Duplicate return instructions into unconditional branches"));
+
+static cl::opt<bool>
+ SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
+ cl::desc("Sink common instructions down to the end block"));
+
+static cl::opt<bool> HoistCondStores(
+ "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
+ cl::desc("Hoist conditional stores if an unconditional store precedes"));
+
+static cl::opt<bool> MergeCondStores(
+ "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
+ cl::desc("Hoist conditional stores even if an unconditional store does not "
+ "precede - hoist multiple conditional stores into a single "
+ "predicated store"));
+
+static cl::opt<bool> MergeCondStoresAggressively(
+ "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
+ cl::desc("When merging conditional stores, do so even if the resultant "
+ "basic blocks are unlikely to be if-converted as a result"));
+
+static cl::opt<bool> SpeculateOneExpensiveInst(
+ "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
+ cl::desc("Allow exactly one expensive instruction to be speculatively "
+ "executed"));
+
+static cl::opt<unsigned> MaxSpeculationDepth(
+ "max-speculation-depth", cl::Hidden, cl::init(10),
+ cl::desc("Limit maximum recursion depth when calculating costs of "
+ "speculatively executed instructions"));
+
+STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
+STATISTIC(NumLinearMaps,
+ "Number of switch instructions turned into linear mapping");
+STATISTIC(NumLookupTables,
+ "Number of switch instructions turned into lookup tables");
+STATISTIC(
+ NumLookupTablesHoles,
+ "Number of switch instructions turned into lookup tables (holes checked)");
+STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
+STATISTIC(NumSinkCommons,
+ "Number of common instructions sunk down to the end block");
+STATISTIC(NumSpeculations, "Number of speculative executed instructions");
+
+namespace {
+
+// The first field contains the value that the switch produces when a certain
+// case group is selected, and the second field is a vector containing the
+// cases composing the case group.
+typedef SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>
+ SwitchCaseResultVectorTy;
+// The first field contains the phi node that generates a result of the switch
+// and the second field contains the value generated for a certain case in the
+// switch for that PHI.
+typedef SmallVector<std::pair<PHINode *, Constant *>, 4> SwitchCaseResultsTy;
+
+/// ValueEqualityComparisonCase - Represents a case of a switch.
+struct ValueEqualityComparisonCase {
+ ConstantInt *Value;
+ BasicBlock *Dest;
+
+ ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
+ : Value(Value), Dest(Dest) {}
+
+ bool operator<(ValueEqualityComparisonCase RHS) const {
+ // Comparing pointers is ok as we only rely on the order for uniquing.
+ return Value < RHS.Value;
+ }
+
+ bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
+};
+
+class SimplifyCFGOpt {
+ const TargetTransformInfo &TTI;
+ const DataLayout &DL;
+ unsigned BonusInstThreshold;
+ AssumptionCache *AC;
+ SmallPtrSetImpl<BasicBlock *> *LoopHeaders;
+ // See comments in SimplifyCFGOpt::SimplifySwitch.
+ bool LateSimplifyCFG;
+ Value *isValueEqualityComparison(TerminatorInst *TI);
+ BasicBlock *GetValueEqualityComparisonCases(
+ TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases);
+ bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+ BasicBlock *Pred,
+ IRBuilder<> &Builder);
+ bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
+ IRBuilder<> &Builder);
+
+ bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
+ bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
+ bool SimplifySingleResume(ResumeInst *RI);
+ bool SimplifyCommonResume(ResumeInst *RI);
+ bool SimplifyCleanupReturn(CleanupReturnInst *RI);
+ bool SimplifyUnreachable(UnreachableInst *UI);
+ bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
+ bool SimplifyIndirectBr(IndirectBrInst *IBI);
+ bool SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
+ bool SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
+
+public:
+ SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
+ unsigned BonusInstThreshold, AssumptionCache *AC,
+ SmallPtrSetImpl<BasicBlock *> *LoopHeaders,
+ bool LateSimplifyCFG)
+ : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC),
+ LoopHeaders(LoopHeaders), LateSimplifyCFG(LateSimplifyCFG) {}
+
+ bool run(BasicBlock *BB);
+};
+
+} // end anonymous namespace
+
+/// Return true if it is safe to merge these two
+/// terminator instructions together.
+static bool
+SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2,
+ SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
+ if (SI1 == SI2)
+ return false; // Can't merge with self!
+
+ // It is not safe to merge these two switch instructions if they have a common
+ // successor, and if that successor has a PHI node, and if *that* PHI node has
+ // conflicting incoming values from the two switch blocks.
+ BasicBlock *SI1BB = SI1->getParent();
+ BasicBlock *SI2BB = SI2->getParent();
+
+ SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
+ bool Fail = false;
+ for (BasicBlock *Succ : successors(SI2BB))
+ if (SI1Succs.count(Succ))
+ for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) {
+ PHINode *PN = cast<PHINode>(BBI);
+ if (PN->getIncomingValueForBlock(SI1BB) !=
+ PN->getIncomingValueForBlock(SI2BB)) {
+ if (FailBlocks)
+ FailBlocks->insert(Succ);
+ Fail = true;
+ }
+ }
+
+ return !Fail;
+}
+
+/// Return true if it is safe and profitable to merge these two terminator
+/// instructions together, where SI1 is an unconditional branch. PhiNodes will
+/// store all PHI nodes in common successors.
+static bool
+isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2,
+ Instruction *Cond,
+ SmallVectorImpl<PHINode *> &PhiNodes) {
+ if (SI1 == SI2)
+ return false; // Can't merge with self!
+ assert(SI1->isUnconditional() && SI2->isConditional());
+
+ // We fold the unconditional branch if we can easily update all PHI nodes in
+ // common successors:
+ // 1> We have a constant incoming value for the conditional branch;
+ // 2> We have "Cond" as the incoming value for the unconditional branch;
+ // 3> SI2->getCondition() and Cond have same operands.
+ CmpInst *Ci2 = dyn_cast<CmpInst>(SI2->getCondition());
+ if (!Ci2)
+ return false;
+ if (!(Cond->getOperand(0) == Ci2->getOperand(0) &&
+ Cond->getOperand(1) == Ci2->getOperand(1)) &&
+ !(Cond->getOperand(0) == Ci2->getOperand(1) &&
+ Cond->getOperand(1) == Ci2->getOperand(0)))
+ return false;
+
+ BasicBlock *SI1BB = SI1->getParent();
+ BasicBlock *SI2BB = SI2->getParent();
+ SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
+ for (BasicBlock *Succ : successors(SI2BB))
+ if (SI1Succs.count(Succ))
+ for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) {
+ PHINode *PN = cast<PHINode>(BBI);
+ if (PN->getIncomingValueForBlock(SI1BB) != Cond ||
+ !isa<ConstantInt>(PN->getIncomingValueForBlock(SI2BB)))
+ return false;
+ PhiNodes.push_back(PN);
+ }
+ return true;
+}
+
+/// Update PHI nodes in Succ to indicate that there will now be entries in it
+/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
+/// will be the same as those coming in from ExistPred, an existing predecessor
+/// of Succ.
+static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
+ BasicBlock *ExistPred) {
+ if (!isa<PHINode>(Succ->begin()))
+ return; // Quick exit if nothing to do
+
+ PHINode *PN;
+ for (BasicBlock::iterator I = Succ->begin(); (PN = dyn_cast<PHINode>(I)); ++I)
+ PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred);
+}
+
+/// Compute an abstract "cost" of speculating the given instruction,
+/// which is assumed to be safe to speculate. TCC_Free means cheap,
+/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
+/// expensive.
+static unsigned ComputeSpeculationCost(const User *I,
+ const TargetTransformInfo &TTI) {
+ assert(isSafeToSpeculativelyExecute(I) &&
+ "Instruction is not safe to speculatively execute!");
+ return TTI.getUserCost(I);
+}
+
+/// If we have a merge point of an "if condition" as accepted above,
+/// return true if the specified value dominates the block. We
+/// don't handle the true generality of domination here, just a special case
+/// which works well enough for us.
+///
+/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
+/// see if V (which must be an instruction) and its recursive operands
+/// that do not dominate BB have a combined cost lower than CostRemaining and
+/// are non-trapping. If both are true, the instruction is inserted into the
+/// set and true is returned.
+///
+/// The cost for most non-trapping instructions is defined as 1 except for
+/// Select whose cost is 2.
+///
+/// After this function returns, CostRemaining is decreased by the cost of
+/// V plus its non-dominating operands. If that cost is greater than
+/// CostRemaining, false is returned and CostRemaining is undefined.
+static bool DominatesMergePoint(Value *V, BasicBlock *BB,
+ SmallPtrSetImpl<Instruction *> *AggressiveInsts,
+ unsigned &CostRemaining,
+ const TargetTransformInfo &TTI,
+ unsigned Depth = 0) {
+ // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
+ // so limit the recursion depth.
+ // TODO: While this recursion limit does prevent pathological behavior, it
+ // would be better to track visited instructions to avoid cycles.
+ if (Depth == MaxSpeculationDepth)
+ return false;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) {
+ // Non-instructions all dominate instructions, but not all constantexprs
+ // can be executed unconditionally.
+ if (ConstantExpr *C = dyn_cast<ConstantExpr>(V))
+ if (C->canTrap())
+ return false;
+ return true;
+ }
+ BasicBlock *PBB = I->getParent();
+
+ // We don't want to allow weird loops that might have the "if condition" in
+ // the bottom of this block.
+ if (PBB == BB)
+ return false;
+
+ // If this instruction is defined in a block that contains an unconditional
+ // branch to BB, then it must be in the 'conditional' part of the "if
+ // statement". If not, it definitely dominates the region.
+ BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
+ if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
+ return true;
+
+ // If we aren't allowing aggressive promotion anymore, then don't consider
+ // instructions in the 'if region'.
+ if (!AggressiveInsts)
+ return false;
+
+ // If we have seen this instruction before, don't count it again.
+ if (AggressiveInsts->count(I))
+ return true;
+
+ // Okay, it looks like the instruction IS in the "condition". Check to
+ // see if it's a cheap instruction to unconditionally compute, and if it
+ // only uses stuff defined outside of the condition. If so, hoist it out.
+ if (!isSafeToSpeculativelyExecute(I))
+ return false;
+
+ unsigned Cost = ComputeSpeculationCost(I, TTI);
+
+ // Allow exactly one instruction to be speculated regardless of its cost
+ // (as long as it is safe to do so).
+ // This is intended to flatten the CFG even if the instruction is a division
+ // or other expensive operation. The speculation of an expensive instruction
+ // is expected to be undone in CodeGenPrepare if the speculation has not
+ // enabled further IR optimizations.
+ if (Cost > CostRemaining &&
+ (!SpeculateOneExpensiveInst || !AggressiveInsts->empty() || Depth > 0))
+ return false;
+
+ // Avoid unsigned wrap.
+ CostRemaining = (Cost > CostRemaining) ? 0 : CostRemaining - Cost;
+
+ // Okay, we can only really hoist these out if their operands do
+ // not take us over the cost threshold.
+ for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
+ if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI,
+ Depth + 1))
+ return false;
+ // Okay, it's safe to do this! Remember this instruction.
+ AggressiveInsts->insert(I);
+ return true;
+}
+
+/// Extract ConstantInt from value, looking through IntToPtr
+/// and PointerNullValue. Return NULL if value is not a constant int.
+static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
+ // Normal constant int.
+ ConstantInt *CI = dyn_cast<ConstantInt>(V);
+ if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
+ return CI;
+
+ // This is some kind of pointer constant. Turn it into a pointer-sized
+ // ConstantInt if possible.
+ IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
+
+ // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
+ if (isa<ConstantPointerNull>(V))
+ return ConstantInt::get(PtrTy, 0);
+
+ // IntToPtr const int.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::IntToPtr)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
+ // The constant is very likely to have the right type already.
+ if (CI->getType() == PtrTy)
+ return CI;
+ else
+ return cast<ConstantInt>(
+ ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
+ }
+ return nullptr;
+}
+
+namespace {
+
+/// Given a chain of or (||) or and (&&) comparison of a value against a
+/// constant, this will try to recover the information required for a switch
+/// structure.
+/// It will depth-first traverse the chain of comparison, seeking for patterns
+/// like %a == 12 or %a < 4 and combine them to produce a set of integer
+/// representing the different cases for the switch.
+/// Note that if the chain is composed of '||' it will build the set of elements
+/// that matches the comparisons (i.e. any of this value validate the chain)
+/// while for a chain of '&&' it will build the set elements that make the test
+/// fail.
+struct ConstantComparesGatherer {
+ const DataLayout &DL;
+ Value *CompValue; /// Value found for the switch comparison
+ Value *Extra; /// Extra clause to be checked before the switch
+ SmallVector<ConstantInt *, 8> Vals; /// Set of integers to match in switch
+ unsigned UsedICmps; /// Number of comparisons matched in the and/or chain
+
+ /// Construct and compute the result for the comparison instruction Cond
+ ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL)
+ : DL(DL), CompValue(nullptr), Extra(nullptr), UsedICmps(0) {
+ gather(Cond);
+ }
+
+ /// Prevent copy
+ ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
+ ConstantComparesGatherer &
+ operator=(const ConstantComparesGatherer &) = delete;
+
+private:
+ /// Try to set the current value used for the comparison, it succeeds only if
+ /// it wasn't set before or if the new value is the same as the old one
+ bool setValueOnce(Value *NewVal) {
+ if (CompValue && CompValue != NewVal)
+ return false;
+ CompValue = NewVal;
+ return (CompValue != nullptr);
+ }
+
+ /// Try to match Instruction "I" as a comparison against a constant and
+ /// populates the array Vals with the set of values that match (or do not
+ /// match depending on isEQ).
+ /// Return false on failure. On success, the Value the comparison matched
+ /// against is placed in CompValue.
+ /// If CompValue is already set, the function is expected to fail if a match
+ /// is found but the value compared to is different.
+ bool matchInstruction(Instruction *I, bool isEQ) {
+ // If this is an icmp against a constant, handle this as one of the cases.
+ ICmpInst *ICI;
+ ConstantInt *C;
+ if (!((ICI = dyn_cast<ICmpInst>(I)) &&
+ (C = GetConstantInt(I->getOperand(1), DL)))) {
+ return false;
+ }
+
+ Value *RHSVal;
+ const APInt *RHSC;
+
+ // Pattern match a special case
+ // (x & ~2^z) == y --> x == y || x == y|2^z
+ // This undoes a transformation done by instcombine to fuse 2 compares.
+ if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
+
+ // It's a little bit hard to see why the following transformations are
+ // correct. Here is a CVC3 program to verify them for 64-bit values:
+
+ /*
+ ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
+ x : BITVECTOR(64);
+ y : BITVECTOR(64);
+ z : BITVECTOR(64);
+ mask : BITVECTOR(64) = BVSHL(ONE, z);
+ QUERY( (y & ~mask = y) =>
+ ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
+ );
+ QUERY( (y | mask = y) =>
+ ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
+ );
+ */
+
+ // Please note that each pattern must be a dual implication (<--> or
+ // iff). One directional implication can create spurious matches. If the
+ // implication is only one-way, an unsatisfiable condition on the left
+ // side can imply a satisfiable condition on the right side. Dual
+ // implication ensures that satisfiable conditions are transformed to
+ // other satisfiable conditions and unsatisfiable conditions are
+ // transformed to other unsatisfiable conditions.
+
+ // Here is a concrete example of a unsatisfiable condition on the left
+ // implying a satisfiable condition on the right:
+ //
+ // mask = (1 << z)
+ // (x & ~mask) == y --> (x == y || x == (y | mask))
+ //
+ // Substituting y = 3, z = 0 yields:
+ // (x & -2) == 3 --> (x == 3 || x == 2)
+
+ // Pattern match a special case:
+ /*
+ QUERY( (y & ~mask = y) =>
+ ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
+ );
+ */
+ if (match(ICI->getOperand(0),
+ m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
+ APInt Mask = ~*RHSC;
+ if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
+ // If we already have a value for the switch, it has to match!
+ if (!setValueOnce(RHSVal))
+ return false;
+
+ Vals.push_back(C);
+ Vals.push_back(
+ ConstantInt::get(C->getContext(),
+ C->getValue() | Mask));
+ UsedICmps++;
+ return true;
+ }
+ }
+
+ // Pattern match a special case:
+ /*
+ QUERY( (y | mask = y) =>
+ ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
+ );
+ */
+ if (match(ICI->getOperand(0),
+ m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
+ APInt Mask = *RHSC;
+ if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
+ // If we already have a value for the switch, it has to match!
+ if (!setValueOnce(RHSVal))
+ return false;
+
+ Vals.push_back(C);
+ Vals.push_back(ConstantInt::get(C->getContext(),
+ C->getValue() & ~Mask));
+ UsedICmps++;
+ return true;
+ }
+ }
+
+ // If we already have a value for the switch, it has to match!
+ if (!setValueOnce(ICI->getOperand(0)))
+ return false;
+
+ UsedICmps++;
+ Vals.push_back(C);
+ return ICI->getOperand(0);
+ }
+
+ // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
+ ConstantRange Span = ConstantRange::makeAllowedICmpRegion(
+ ICI->getPredicate(), C->getValue());
+
+ // Shift the range if the compare is fed by an add. This is the range
+ // compare idiom as emitted by instcombine.
+ Value *CandidateVal = I->getOperand(0);
+ if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
+ Span = Span.subtract(*RHSC);
+ CandidateVal = RHSVal;
+ }
+
+ // If this is an and/!= check, then we are looking to build the set of
+ // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
+ // x != 0 && x != 1.
+ if (!isEQ)
+ Span = Span.inverse();
+
+ // If there are a ton of values, we don't want to make a ginormous switch.
+ if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
+ return false;
+ }
+
+ // If we already have a value for the switch, it has to match!
+ if (!setValueOnce(CandidateVal))
+ return false;
+
+ // Add all values from the range to the set
+ for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
+ Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
+
+ UsedICmps++;
+ return true;
+ }
+
+ /// Given a potentially 'or'd or 'and'd together collection of icmp
+ /// eq/ne/lt/gt instructions that compare a value against a constant, extract
+ /// the value being compared, and stick the list constants into the Vals
+ /// vector.
+ /// One "Extra" case is allowed to differ from the other.
+ void gather(Value *V) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ bool isEQ = (I->getOpcode() == Instruction::Or);
+
+ // Keep a stack (SmallVector for efficiency) for depth-first traversal
+ SmallVector<Value *, 8> DFT;
+ SmallPtrSet<Value *, 8> Visited;
+
+ // Initialize
+ Visited.insert(V);
+ DFT.push_back(V);
+
+ while (!DFT.empty()) {
+ V = DFT.pop_back_val();
+
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ // If it is a || (or && depending on isEQ), process the operands.
+ if (I->getOpcode() == (isEQ ? Instruction::Or : Instruction::And)) {
+ if (Visited.insert(I->getOperand(1)).second)
+ DFT.push_back(I->getOperand(1));
+ if (Visited.insert(I->getOperand(0)).second)
+ DFT.push_back(I->getOperand(0));
+ continue;
+ }
+
+ // Try to match the current instruction
+ if (matchInstruction(I, isEQ))
+ // Match succeed, continue the loop
+ continue;
+ }
+
+ // One element of the sequence of || (or &&) could not be match as a
+ // comparison against the same value as the others.
+ // We allow only one "Extra" case to be checked before the switch
+ if (!Extra) {
+ Extra = V;
+ continue;
+ }
+ // Failed to parse a proper sequence, abort now
+ CompValue = nullptr;
+ break;
+ }
+ }
+};
+
+} // end anonymous namespace
+
+static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
+ Instruction *Cond = nullptr;
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ Cond = dyn_cast<Instruction>(SI->getCondition());
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional())
+ Cond = dyn_cast<Instruction>(BI->getCondition());
+ } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
+ Cond = dyn_cast<Instruction>(IBI->getAddress());
+ }
+
+ TI->eraseFromParent();
+ if (Cond)
+ RecursivelyDeleteTriviallyDeadInstructions(Cond);
+}
+
+/// Return true if the specified terminator checks
+/// to see if a value is equal to constant integer value.
+Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
+ Value *CV = nullptr;
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ // Do not permit merging of large switch instructions into their
+ // predecessors unless there is only one predecessor.
+ if (SI->getNumSuccessors() * std::distance(pred_begin(SI->getParent()),
+ pred_end(SI->getParent())) <=
+ 128)
+ CV = SI->getCondition();
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
+ if (BI->isConditional() && BI->getCondition()->hasOneUse())
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
+ if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
+ CV = ICI->getOperand(0);
+ }
+
+ // Unwrap any lossless ptrtoint cast.
+ if (CV) {
+ if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
+ Value *Ptr = PTII->getPointerOperand();
+ if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
+ CV = Ptr;
+ }
+ }
+ return CV;
+}
+
+/// Given a value comparison instruction,
+/// decode all of the 'cases' that it represents and return the 'default' block.
+BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
+ TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ Cases.reserve(SI->getNumCases());
+ for (auto Case : SI->cases())
+ Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
+ Case.getCaseSuccessor()));
+ return SI->getDefaultDest();
+ }
+
+ BranchInst *BI = cast<BranchInst>(TI);
+ ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
+ BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
+ Cases.push_back(ValueEqualityComparisonCase(
+ GetConstantInt(ICI->getOperand(1), DL), Succ));
+ return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
+}
+
+/// Given a vector of bb/value pairs, remove any entries
+/// in the list that match the specified block.
+static void
+EliminateBlockCases(BasicBlock *BB,
+ std::vector<ValueEqualityComparisonCase> &Cases) {
+ Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end());
+}
+
+/// Return true if there are any keys in C1 that exist in C2 as well.
+static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
+ std::vector<ValueEqualityComparisonCase> &C2) {
+ std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
+
+ // Make V1 be smaller than V2.
+ if (V1->size() > V2->size())
+ std::swap(V1, V2);
+
+ if (V1->empty())
+ return false;
+ if (V1->size() == 1) {
+ // Just scan V2.
+ ConstantInt *TheVal = (*V1)[0].Value;
+ for (unsigned i = 0, e = V2->size(); i != e; ++i)
+ if (TheVal == (*V2)[i].Value)
+ return true;
+ }
+
+ // Otherwise, just sort both lists and compare element by element.
+ array_pod_sort(V1->begin(), V1->end());
+ array_pod_sort(V2->begin(), V2->end());
+ unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
+ while (i1 != e1 && i2 != e2) {
+ if ((*V1)[i1].Value == (*V2)[i2].Value)
+ return true;
+ if ((*V1)[i1].Value < (*V2)[i2].Value)
+ ++i1;
+ else
+ ++i2;
+ }
+ return false;
+}
+
+/// If TI is known to be a terminator instruction and its block is known to
+/// only have a single predecessor block, check to see if that predecessor is
+/// also a value comparison with the same value, and if that comparison
+/// determines the outcome of this comparison. If so, simplify TI. This does a
+/// very limited form of jump threading.
+bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
+ TerminatorInst *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
+ Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
+ if (!PredVal)
+ return false; // Not a value comparison in predecessor.
+
+ Value *ThisVal = isValueEqualityComparison(TI);
+ assert(ThisVal && "This isn't a value comparison!!");
+ if (ThisVal != PredVal)
+ return false; // Different predicates.
+
+ // TODO: Preserve branch weight metadata, similarly to how
+ // FoldValueComparisonIntoPredecessors preserves it.
+
+ // Find out information about when control will move from Pred to TI's block.
+ std::vector<ValueEqualityComparisonCase> PredCases;
+ BasicBlock *PredDef =
+ GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
+ EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
+
+ // Find information about how control leaves this block.
+ std::vector<ValueEqualityComparisonCase> ThisCases;
+ BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
+ EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
+
+ // If TI's block is the default block from Pred's comparison, potentially
+ // simplify TI based on this knowledge.
+ if (PredDef == TI->getParent()) {
+ // If we are here, we know that the value is none of those cases listed in
+ // PredCases. If there are any cases in ThisCases that are in PredCases, we
+ // can simplify TI.
+ if (!ValuesOverlap(PredCases, ThisCases))
+ return false;
+
+ if (isa<BranchInst>(TI)) {
+ // Okay, one of the successors of this condbr is dead. Convert it to a
+ // uncond br.
+ assert(ThisCases.size() == 1 && "Branch can only have one case!");
+ // Insert the new branch.
+ Instruction *NI = Builder.CreateBr(ThisDef);
+ (void)NI;
+
+ // Remove PHI node entries for the dead edge.
+ ThisCases[0].Dest->removePredecessor(TI->getParent());
+
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI << "Leaving: " << *NI
+ << "\n");
+
+ EraseTerminatorInstAndDCECond(TI);
+ return true;
+ }
+
+ SwitchInst *SI = cast<SwitchInst>(TI);
+ // Okay, TI has cases that are statically dead, prune them away.
+ SmallPtrSet<Constant *, 16> DeadCases;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ DeadCases.insert(PredCases[i].Value);
+
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI);
+
+ // Collect branch weights into a vector.
+ SmallVector<uint32_t, 8> Weights;
+ MDNode *MD = SI->getMetadata(LLVMContext::MD_prof);
+ bool HasWeight = MD && (MD->getNumOperands() == 2 + SI->getNumCases());
+ if (HasWeight)
+ for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
+ ++MD_i) {
+ ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(MD_i));
+ Weights.push_back(CI->getValue().getZExtValue());
+ }
+ for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
+ --i;
+ if (DeadCases.count(i->getCaseValue())) {
+ if (HasWeight) {
+ std::swap(Weights[i->getCaseIndex() + 1], Weights.back());
+ Weights.pop_back();
+ }
+ i->getCaseSuccessor()->removePredecessor(TI->getParent());
+ SI->removeCase(i);
+ }
+ }
+ if (HasWeight && Weights.size() >= 2)
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getParent()->getContext())
+ .createBranchWeights(Weights));
+
+ DEBUG(dbgs() << "Leaving: " << *TI << "\n");
+ return true;
+ }
+
+ // Otherwise, TI's block must correspond to some matched value. Find out
+ // which value (or set of values) this is.
+ ConstantInt *TIV = nullptr;
+ BasicBlock *TIBB = TI->getParent();
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].Dest == TIBB) {
+ if (TIV)
+ return false; // Cannot handle multiple values coming to this block.
+ TIV = PredCases[i].Value;
+ }
+ assert(TIV && "No edge from pred to succ?");
+
+ // Okay, we found the one constant that our value can be if we get into TI's
+ // BB. Find out which successor will unconditionally be branched to.
+ BasicBlock *TheRealDest = nullptr;
+ for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
+ if (ThisCases[i].Value == TIV) {
+ TheRealDest = ThisCases[i].Dest;
+ break;
+ }
+
+ // If not handled by any explicit cases, it is handled by the default case.
+ if (!TheRealDest)
+ TheRealDest = ThisDef;
+
+ // Remove PHI node entries for dead edges.
+ BasicBlock *CheckEdge = TheRealDest;
+ for (BasicBlock *Succ : successors(TIBB))
+ if (Succ != CheckEdge)
+ Succ->removePredecessor(TIBB);
+ else
+ CheckEdge = nullptr;
+
+ // Insert the new branch.
+ Instruction *NI = Builder.CreateBr(TheRealDest);
+ (void)NI;
+
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI << "Leaving: " << *NI
+ << "\n");
+
+ EraseTerminatorInstAndDCECond(TI);
+ return true;
+}
+
+namespace {
+
+/// This class implements a stable ordering of constant
+/// integers that does not depend on their address. This is important for
+/// applications that sort ConstantInt's to ensure uniqueness.
+struct ConstantIntOrdering {
+ bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
+ return LHS->getValue().ult(RHS->getValue());
+ }
+};
+
+} // end anonymous namespace
+
+static int ConstantIntSortPredicate(ConstantInt *const *P1,
+ ConstantInt *const *P2) {
+ const ConstantInt *LHS = *P1;
+ const ConstantInt *RHS = *P2;
+ if (LHS == RHS)
+ return 0;
+ return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
+}
+
+static inline bool HasBranchWeights(const Instruction *I) {
+ MDNode *ProfMD = I->getMetadata(LLVMContext::MD_prof);
+ if (ProfMD && ProfMD->getOperand(0))
+ if (MDString *MDS = dyn_cast<MDString>(ProfMD->getOperand(0)))
+ return MDS->getString().equals("branch_weights");
+
+ return false;
+}
+
+/// Get Weights of a given TerminatorInst, the default weight is at the front
+/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
+/// metadata.
+static void GetBranchWeights(TerminatorInst *TI,
+ SmallVectorImpl<uint64_t> &Weights) {
+ MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
+ assert(MD);
+ for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
+ ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(i));
+ Weights.push_back(CI->getValue().getZExtValue());
+ }
+
+ // If TI is a conditional eq, the default case is the false case,
+ // and the corresponding branch-weight data is at index 2. We swap the
+ // default weight to be the first entry.
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ assert(Weights.size() == 2);
+ ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ std::swap(Weights.front(), Weights.back());
+ }
+}
+
+/// Keep halving the weights until all can fit in uint32_t.
+static void FitWeights(MutableArrayRef<uint64_t> Weights) {
+ uint64_t Max = *std::max_element(Weights.begin(), Weights.end());
+ if (Max > UINT_MAX) {
+ unsigned Offset = 32 - countLeadingZeros(Max);
+ for (uint64_t &I : Weights)
+ I >>= Offset;
+ }
+}
+
+/// The specified terminator is a value equality comparison instruction
+/// (either a switch or a branch on "X == c").
+/// See if any of the predecessors of the terminator block are value comparisons
+/// on the same value. If so, and if safe to do so, fold them together.
+bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
+ IRBuilder<> &Builder) {
+ BasicBlock *BB = TI->getParent();
+ Value *CV = isValueEqualityComparison(TI); // CondVal
+ assert(CV && "Not a comparison?");
+ bool Changed = false;
+
+ SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
+ while (!Preds.empty()) {
+ BasicBlock *Pred = Preds.pop_back_val();
+
+ // See if the predecessor is a comparison with the same value.
+ TerminatorInst *PTI = Pred->getTerminator();
+ Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
+
+ if (PCV == CV && TI != PTI) {
+ SmallSetVector<BasicBlock*, 4> FailBlocks;
+ if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
+ for (auto *Succ : FailBlocks) {
+ if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split"))
+ return false;
+ }
+ }
+
+ // Figure out which 'cases' to copy from SI to PSI.
+ std::vector<ValueEqualityComparisonCase> BBCases;
+ BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
+
+ std::vector<ValueEqualityComparisonCase> PredCases;
+ BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
+
+ // Based on whether the default edge from PTI goes to BB or not, fill in
+ // PredCases and PredDefault with the new switch cases we would like to
+ // build.
+ SmallVector<BasicBlock *, 8> NewSuccessors;
+
+ // Update the branch weight metadata along the way
+ SmallVector<uint64_t, 8> Weights;
+ bool PredHasWeights = HasBranchWeights(PTI);
+ bool SuccHasWeights = HasBranchWeights(TI);
+
+ if (PredHasWeights) {
+ GetBranchWeights(PTI, Weights);
+ // branch-weight metadata is inconsistent here.
+ if (Weights.size() != 1 + PredCases.size())
+ PredHasWeights = SuccHasWeights = false;
+ } else if (SuccHasWeights)
+ // If there are no predecessor weights but there are successor weights,
+ // populate Weights with 1, which will later be scaled to the sum of
+ // successor's weights
+ Weights.assign(1 + PredCases.size(), 1);
+
+ SmallVector<uint64_t, 8> SuccWeights;
+ if (SuccHasWeights) {
+ GetBranchWeights(TI, SuccWeights);
+ // branch-weight metadata is inconsistent here.
+ if (SuccWeights.size() != 1 + BBCases.size())
+ PredHasWeights = SuccHasWeights = false;
+ } else if (PredHasWeights)
+ SuccWeights.assign(1 + BBCases.size(), 1);
+
+ if (PredDefault == BB) {
+ // If this is the default destination from PTI, only the edges in TI
+ // that don't occur in PTI, or that branch to BB will be activated.
+ std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].Dest != BB)
+ PTIHandled.insert(PredCases[i].Value);
+ else {
+ // The default destination is BB, we don't need explicit targets.
+ std::swap(PredCases[i], PredCases.back());
+
+ if (PredHasWeights || SuccHasWeights) {
+ // Increase weight for the default case.
+ Weights[0] += Weights[i + 1];
+ std::swap(Weights[i + 1], Weights.back());
+ Weights.pop_back();
+ }
+
+ PredCases.pop_back();
+ --i;
+ --e;
+ }
+
+ // Reconstruct the new switch statement we will be building.
+ if (PredDefault != BBDefault) {
+ PredDefault->removePredecessor(Pred);
+ PredDefault = BBDefault;
+ NewSuccessors.push_back(BBDefault);
+ }
+
+ unsigned CasesFromPred = Weights.size();
+ uint64_t ValidTotalSuccWeight = 0;
+ for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+ if (!PTIHandled.count(BBCases[i].Value) &&
+ BBCases[i].Dest != BBDefault) {
+ PredCases.push_back(BBCases[i]);
+ NewSuccessors.push_back(BBCases[i].Dest);
+ if (SuccHasWeights || PredHasWeights) {
+ // The default weight is at index 0, so weight for the ith case
+ // should be at index i+1. Scale the cases from successor by
+ // PredDefaultWeight (Weights[0]).
+ Weights.push_back(Weights[0] * SuccWeights[i + 1]);
+ ValidTotalSuccWeight += SuccWeights[i + 1];
+ }
+ }
+
+ if (SuccHasWeights || PredHasWeights) {
+ ValidTotalSuccWeight += SuccWeights[0];
+ // Scale the cases from predecessor by ValidTotalSuccWeight.
+ for (unsigned i = 1; i < CasesFromPred; ++i)
+ Weights[i] *= ValidTotalSuccWeight;
+ // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
+ Weights[0] *= SuccWeights[0];
+ }
+ } else {
+ // If this is not the default destination from PSI, only the edges
+ // in SI that occur in PSI with a destination of BB will be
+ // activated.
+ std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
+ std::map<ConstantInt *, uint64_t> WeightsForHandled;
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].Dest == BB) {
+ PTIHandled.insert(PredCases[i].Value);
+
+ if (PredHasWeights || SuccHasWeights) {
+ WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
+ std::swap(Weights[i + 1], Weights.back());
+ Weights.pop_back();
+ }
+
+ std::swap(PredCases[i], PredCases.back());
+ PredCases.pop_back();
+ --i;
+ --e;
+ }
+
+ // Okay, now we know which constants were sent to BB from the
+ // predecessor. Figure out where they will all go now.
+ for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+ if (PTIHandled.count(BBCases[i].Value)) {
+ // If this is one we are capable of getting...
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[BBCases[i].Value]);
+ PredCases.push_back(BBCases[i]);
+ NewSuccessors.push_back(BBCases[i].Dest);
+ PTIHandled.erase(
+ BBCases[i].Value); // This constant is taken care of
+ }
+
+ // If there are any constants vectored to BB that TI doesn't handle,
+ // they must go to the default destination of TI.
+ for (ConstantInt *I : PTIHandled) {
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[I]);
+ PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
+ NewSuccessors.push_back(BBDefault);
+ }
+ }
+
+ // Okay, at this point, we know which new successor Pred will get. Make
+ // sure we update the number of entries in the PHI nodes for these
+ // successors.
+ for (BasicBlock *NewSuccessor : NewSuccessors)
+ AddPredecessorToBlock(NewSuccessor, Pred, BB);
+
+ Builder.SetInsertPoint(PTI);
+ // Convert pointer to int before we switch.
+ if (CV->getType()->isPointerTy()) {
+ CV = Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()),
+ "magicptr");
+ }
+
+ // Now that the successors are updated, create the new Switch instruction.
+ SwitchInst *NewSI =
+ Builder.CreateSwitch(CV, PredDefault, PredCases.size());
+ NewSI->setDebugLoc(PTI->getDebugLoc());
+ for (ValueEqualityComparisonCase &V : PredCases)
+ NewSI->addCase(V.Value, V.Dest);
+
+ if (PredHasWeights || SuccHasWeights) {
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(Weights);
+
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+
+ NewSI->setMetadata(
+ LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).createBranchWeights(MDWeights));
+ }
+
+ EraseTerminatorInstAndDCECond(PTI);
+
+ // Okay, last check. If BB is still a successor of PSI, then we must
+ // have an infinite loop case. If so, add an infinitely looping block
+ // to handle the case to preserve the behavior of the code.
+ BasicBlock *InfLoopBlock = nullptr;
+ for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
+ if (NewSI->getSuccessor(i) == BB) {
+ if (!InfLoopBlock) {
+ // Insert it at the end of the function, because it's either code,
+ // or it won't matter if it's hot. :)
+ InfLoopBlock = BasicBlock::Create(BB->getContext(), "infloop",
+ BB->getParent());
+ BranchInst::Create(InfLoopBlock, InfLoopBlock);
+ }
+ NewSI->setSuccessor(i, InfLoopBlock);
+ }
+
+ Changed = true;
+ }
+ }
+ return Changed;
+}
+
+// If we would need to insert a select that uses the value of this invoke
+// (comments in HoistThenElseCodeToIf explain why we would need to do this), we
+// can't hoist the invoke, as there is nowhere to put the select in this case.
+static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
+ Instruction *I1, Instruction *I2) {
+ for (BasicBlock *Succ : successors(BB1)) {
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = Succ->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I);
+
+/// Given a conditional branch that goes to BB1 and BB2, hoist any common code
+/// in the two blocks up into the branch block. The caller of this function
+/// guarantees that BI's block dominates BB1 and BB2.
+static bool HoistThenElseCodeToIf(BranchInst *BI,
+ const TargetTransformInfo &TTI) {
+ // This does very trivial matching, with limited scanning, to find identical
+ // instructions in the two blocks. In particular, we don't want to get into
+ // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
+ // such, we currently just scan for obviously identical instructions in an
+ // identical order.
+ BasicBlock *BB1 = BI->getSuccessor(0); // The true destination.
+ BasicBlock *BB2 = BI->getSuccessor(1); // The false destination
+
+ BasicBlock::iterator BB1_Itr = BB1->begin();
+ BasicBlock::iterator BB2_Itr = BB2->begin();
+
+ Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++;
+ // Skip debug info if it is not identical.
+ DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
+ DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
+ if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
+ while (isa<DbgInfoIntrinsic>(I1))
+ I1 = &*BB1_Itr++;
+ while (isa<DbgInfoIntrinsic>(I2))
+ I2 = &*BB2_Itr++;
+ }
+ if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
+ (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
+ return false;
+
+ BasicBlock *BIParent = BI->getParent();
+
+ bool Changed = false;
+ do {
+ // If we are hoisting the terminator instruction, don't move one (making a
+ // broken BB), instead clone it, and remove BI.
+ if (isa<TerminatorInst>(I1))
+ goto HoistTerminator;
+
+ if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
+ return Changed;
+
+ // For a normal instruction, we just move one to right before the branch,
+ // then replace all uses of the other with the first. Finally, we remove
+ // the now redundant second instruction.
+ BIParent->getInstList().splice(BI->getIterator(), BB1->getInstList(), I1);
+ if (!I2->use_empty())
+ I2->replaceAllUsesWith(I1);
+ I1->andIRFlags(I2);
+ unsigned KnownIDs[] = {LLVMContext::MD_tbaa,
+ LLVMContext::MD_range,
+ LLVMContext::MD_fpmath,
+ LLVMContext::MD_invariant_load,
+ LLVMContext::MD_nonnull,
+ LLVMContext::MD_invariant_group,
+ LLVMContext::MD_align,
+ LLVMContext::MD_dereferenceable,
+ LLVMContext::MD_dereferenceable_or_null,
+ LLVMContext::MD_mem_parallel_loop_access};
+ combineMetadata(I1, I2, KnownIDs);
+
+ // I1 and I2 are being combined into a single instruction. Its debug
+ // location is the merged locations of the original instructions.
+ if (!isa<CallInst>(I1))
+ I1->setDebugLoc(
+ DILocation::getMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()));
+
+ I2->eraseFromParent();
+ Changed = true;
+
+ I1 = &*BB1_Itr++;
+ I2 = &*BB2_Itr++;
+ // Skip debug info if it is not identical.
+ DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
+ DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
+ if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
+ while (isa<DbgInfoIntrinsic>(I1))
+ I1 = &*BB1_Itr++;
+ while (isa<DbgInfoIntrinsic>(I2))
+ I2 = &*BB2_Itr++;
+ }
+ } while (I1->isIdenticalToWhenDefined(I2));
+
+ return true;
+
+HoistTerminator:
+ // It may not be possible to hoist an invoke.
+ if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
+ return Changed;
+
+ for (BasicBlock *Succ : successors(BB1)) {
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = Succ->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ if (BB1V == BB2V)
+ continue;
+
+ // Check for passingValueIsAlwaysUndefined here because we would rather
+ // eliminate undefined control flow then converting it to a select.
+ if (passingValueIsAlwaysUndefined(BB1V, PN) ||
+ passingValueIsAlwaysUndefined(BB2V, PN))
+ return Changed;
+
+ if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
+ return Changed;
+ if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V))
+ return Changed;
+ }
+ }
+
+ // Okay, it is safe to hoist the terminator.
+ Instruction *NT = I1->clone();
+ BIParent->getInstList().insert(BI->getIterator(), NT);
+ if (!NT->getType()->isVoidTy()) {
+ I1->replaceAllUsesWith(NT);
+ I2->replaceAllUsesWith(NT);
+ NT->takeName(I1);
+ }
+
+ IRBuilder<NoFolder> Builder(NT);
+ // Hoisting one of the terminators from our successor is a great thing.
+ // Unfortunately, the successors of the if/else blocks may have PHI nodes in
+ // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI
+ // nodes, so we insert select instruction to compute the final result.
+ std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
+ for (BasicBlock *Succ : successors(BB1)) {
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = Succ->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ if (BB1V == BB2V)
+ continue;
+
+ // These values do not agree. Insert a select instruction before NT
+ // that determines the right value.
+ SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
+ if (!SI)
+ SI = cast<SelectInst>(
+ Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
+ BB1V->getName() + "." + BB2V->getName(), BI));
+
+ // Make the PHI node use the select for all incoming values for BB1/BB2
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2)
+ PN->setIncomingValue(i, SI);
+ }
+ }
+
+ // Update any PHI nodes in our new successors.
+ for (BasicBlock *Succ : successors(BB1))
+ AddPredecessorToBlock(Succ, BIParent, BB1);
+
+ EraseTerminatorInstAndDCECond(BI);
+ return true;
+}
+
+// All instructions in Insts belong to different blocks that all unconditionally
+// branch to a common successor. Analyze each instruction and return true if it
+// would be possible to sink them into their successor, creating one common
+// instruction instead. For every value that would be required to be provided by
+// PHI node (because an operand varies in each input block), add to PHIOperands.
+static bool canSinkInstructions(
+ ArrayRef<Instruction *> Insts,
+ DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
+ // Prune out obviously bad instructions to move. Any non-store instruction
+ // must have exactly one use, and we check later that use is by a single,
+ // common PHI instruction in the successor.
+ for (auto *I : Insts) {
+ // These instructions may change or break semantics if moved.
+ if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
+ I->getType()->isTokenTy())
+ return false;
+
+ // Conservatively return false if I is an inline-asm instruction. Sinking
+ // and merging inline-asm instructions can potentially create arguments
+ // that cannot satisfy the inline-asm constraints.
+ if (const auto *C = dyn_cast<CallInst>(I))
+ if (C->isInlineAsm())
+ return false;
+
+ // Everything must have only one use too, apart from stores which
+ // have no uses.
+ if (!isa<StoreInst>(I) && !I->hasOneUse())
+ return false;
+ }
+
+ const Instruction *I0 = Insts.front();
+ for (auto *I : Insts)
+ if (!I->isSameOperationAs(I0))
+ return false;
+
+ // All instructions in Insts are known to be the same opcode. If they aren't
+ // stores, check the only user of each is a PHI or in the same block as the
+ // instruction, because if a user is in the same block as an instruction
+ // we're contemplating sinking, it must already be determined to be sinkable.
+ if (!isa<StoreInst>(I0)) {
+ auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
+ auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
+ if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
+ auto *U = cast<Instruction>(*I->user_begin());
+ return (PNUse &&
+ PNUse->getParent() == Succ &&
+ PNUse->getIncomingValueForBlock(I->getParent()) == I) ||
+ U->getParent() == I->getParent();
+ }))
+ return false;
+ }
+
+ // Because SROA can't handle speculating stores of selects, try not
+ // to sink loads or stores of allocas when we'd have to create a PHI for
+ // the address operand. Also, because it is likely that loads or stores
+ // of allocas will disappear when Mem2Reg/SROA is run, don't sink them.
+ // This can cause code churn which can have unintended consequences down
+ // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
+ // FIXME: This is a workaround for a deficiency in SROA - see
+ // https://llvm.org/bugs/show_bug.cgi?id=30188
+ if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) {
+ return isa<AllocaInst>(I->getOperand(1));
+ }))
+ return false;
+ if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
+ return isa<AllocaInst>(I->getOperand(0));
+ }))
+ return false;
+
+ for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
+ if (I0->getOperand(OI)->getType()->isTokenTy())
+ // Don't touch any operand of token type.
+ return false;
+
+ auto SameAsI0 = [&I0, OI](const Instruction *I) {
+ assert(I->getNumOperands() == I0->getNumOperands());
+ return I->getOperand(OI) == I0->getOperand(OI);
+ };
+ if (!all_of(Insts, SameAsI0)) {
+ if (!canReplaceOperandWithVariable(I0, OI))
+ // We can't create a PHI from this GEP.
+ return false;
+ // Don't create indirect calls! The called value is the final operand.
+ if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OI == OE - 1) {
+ // FIXME: if the call was *already* indirect, we should do this.
+ return false;
+ }
+ for (auto *I : Insts)
+ PHIOperands[I].push_back(I->getOperand(OI));
+ }
+ }
+ return true;
+}
+
+// Assuming canSinkLastInstruction(Blocks) has returned true, sink the last
+// instruction of every block in Blocks to their common successor, commoning
+// into one instruction.
+static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
+ auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
+
+ // canSinkLastInstruction returning true guarantees that every block has at
+ // least one non-terminator instruction.
+ SmallVector<Instruction*,4> Insts;
+ for (auto *BB : Blocks) {
+ Instruction *I = BB->getTerminator();
+ do {
+ I = I->getPrevNode();
+ } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front());
+ if (!isa<DbgInfoIntrinsic>(I))
+ Insts.push_back(I);
+ }
+
+ // The only checking we need to do now is that all users of all instructions
+ // are the same PHI node. canSinkLastInstruction should have checked this but
+ // it is slightly over-aggressive - it gets confused by commutative instructions
+ // so double-check it here.
+ Instruction *I0 = Insts.front();
+ if (!isa<StoreInst>(I0)) {
+ auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
+ if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
+ auto *U = cast<Instruction>(*I->user_begin());
+ return U == PNUse;
+ }))
+ return false;
+ }
+
+ // We don't need to do any more checking here; canSinkLastInstruction should
+ // have done it all for us.
+ SmallVector<Value*, 4> NewOperands;
+ for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
+ // This check is different to that in canSinkLastInstruction. There, we
+ // cared about the global view once simplifycfg (and instcombine) have
+ // completed - it takes into account PHIs that become trivially
+ // simplifiable. However here we need a more local view; if an operand
+ // differs we create a PHI and rely on instcombine to clean up the very
+ // small mess we may make.
+ bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
+ return I->getOperand(O) != I0->getOperand(O);
+ });
+ if (!NeedPHI) {
+ NewOperands.push_back(I0->getOperand(O));
+ continue;
+ }
+
+ // Create a new PHI in the successor block and populate it.
+ auto *Op = I0->getOperand(O);
+ assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
+ auto *PN = PHINode::Create(Op->getType(), Insts.size(),
+ Op->getName() + ".sink", &BBEnd->front());
+ for (auto *I : Insts)
+ PN->addIncoming(I->getOperand(O), I->getParent());
+ NewOperands.push_back(PN);
+ }
+
+ // Arbitrarily use I0 as the new "common" instruction; remap its operands
+ // and move it to the start of the successor block.
+ for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
+ I0->getOperandUse(O).set(NewOperands[O]);
+ I0->moveBefore(&*BBEnd->getFirstInsertionPt());
+
+ // The debug location for the "common" instruction is the merged locations of
+ // all the commoned instructions. We start with the original location of the
+ // "common" instruction and iteratively merge each location in the loop below.
+ const DILocation *Loc = I0->getDebugLoc();
+
+ // Update metadata and IR flags, and merge debug locations.
+ for (auto *I : Insts)
+ if (I != I0) {
+ Loc = DILocation::getMergedLocation(Loc, I->getDebugLoc());
+ combineMetadataForCSE(I0, I);
+ I0->andIRFlags(I);
+ }
+ if (!isa<CallInst>(I0))
+ I0->setDebugLoc(Loc);
+
+ if (!isa<StoreInst>(I0)) {
+ // canSinkLastInstruction checked that all instructions were used by
+ // one and only one PHI node. Find that now, RAUW it to our common
+ // instruction and nuke it.
+ assert(I0->hasOneUse());
+ auto *PN = cast<PHINode>(*I0->user_begin());
+ PN->replaceAllUsesWith(I0);
+ PN->eraseFromParent();
+ }
+
+ // Finally nuke all instructions apart from the common instruction.
+ for (auto *I : Insts)
+ if (I != I0)
+ I->eraseFromParent();
+
+ return true;
+}
+
+namespace {
+
+ // LockstepReverseIterator - Iterates through instructions
+ // in a set of blocks in reverse order from the first non-terminator.
+ // For example (assume all blocks have size n):
+ // LockstepReverseIterator I([B1, B2, B3]);
+ // *I-- = [B1[n], B2[n], B3[n]];
+ // *I-- = [B1[n-1], B2[n-1], B3[n-1]];
+ // *I-- = [B1[n-2], B2[n-2], B3[n-2]];
+ // ...
+ class LockstepReverseIterator {
+ ArrayRef<BasicBlock*> Blocks;
+ SmallVector<Instruction*,4> Insts;
+ bool Fail;
+ public:
+ LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) :
+ Blocks(Blocks) {
+ reset();
+ }
+
+ void reset() {
+ Fail = false;
+ Insts.clear();
+ for (auto *BB : Blocks) {
+ Instruction *Inst = BB->getTerminator();
+ for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
+ Inst = Inst->getPrevNode();
+ if (!Inst) {
+ // Block wasn't big enough.
+ Fail = true;
+ return;
+ }
+ Insts.push_back(Inst);
+ }
+ }
+
+ bool isValid() const {
+ return !Fail;
+ }
+
+ void operator -- () {
+ if (Fail)
+ return;
+ for (auto *&Inst : Insts) {
+ for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
+ Inst = Inst->getPrevNode();
+ // Already at beginning of block.
+ if (!Inst) {
+ Fail = true;
+ return;
+ }
+ }
+ }
+
+ ArrayRef<Instruction*> operator * () const {
+ return Insts;
+ }
+ };
+
+} // end anonymous namespace
+
+/// Given an unconditional branch that goes to BBEnd,
+/// check whether BBEnd has only two predecessors and the other predecessor
+/// ends with an unconditional branch. If it is true, sink any common code
+/// in the two predecessors to BBEnd.
+static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
+ assert(BI1->isUnconditional());
+ BasicBlock *BBEnd = BI1->getSuccessor(0);
+
+ // We support two situations:
+ // (1) all incoming arcs are unconditional
+ // (2) one incoming arc is conditional
+ //
+ // (2) is very common in switch defaults and
+ // else-if patterns;
+ //
+ // if (a) f(1);
+ // else if (b) f(2);
+ //
+ // produces:
+ //
+ // [if]
+ // / \
+ // [f(1)] [if]
+ // | | \
+ // | | |
+ // | [f(2)]|
+ // \ | /
+ // [ end ]
+ //
+ // [end] has two unconditional predecessor arcs and one conditional. The
+ // conditional refers to the implicit empty 'else' arc. This conditional
+ // arc can also be caused by an empty default block in a switch.
+ //
+ // In this case, we attempt to sink code from all *unconditional* arcs.
+ // If we can sink instructions from these arcs (determined during the scan
+ // phase below) we insert a common successor for all unconditional arcs and
+ // connect that to [end], to enable sinking:
+ //
+ // [if]
+ // / \
+ // [x(1)] [if]
+ // | | \
+ // | | \
+ // | [x(2)] |
+ // \ / |
+ // [sink.split] |
+ // \ /
+ // [ end ]
+ //
+ SmallVector<BasicBlock*,4> UnconditionalPreds;
+ Instruction *Cond = nullptr;
+ for (auto *B : predecessors(BBEnd)) {
+ auto *T = B->getTerminator();
+ if (isa<BranchInst>(T) && cast<BranchInst>(T)->isUnconditional())
+ UnconditionalPreds.push_back(B);
+ else if ((isa<BranchInst>(T) || isa<SwitchInst>(T)) && !Cond)
+ Cond = T;
+ else
+ return false;
+ }
+ if (UnconditionalPreds.size() < 2)
+ return false;
+
+ bool Changed = false;
+ // We take a two-step approach to tail sinking. First we scan from the end of
+ // each block upwards in lockstep. If the n'th instruction from the end of each
+ // block can be sunk, those instructions are added to ValuesToSink and we
+ // carry on. If we can sink an instruction but need to PHI-merge some operands
+ // (because they're not identical in each instruction) we add these to
+ // PHIOperands.
+ unsigned ScanIdx = 0;
+ SmallPtrSet<Value*,4> InstructionsToSink;
+ DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands;
+ LockstepReverseIterator LRI(UnconditionalPreds);
+ while (LRI.isValid() &&
+ canSinkInstructions(*LRI, PHIOperands)) {
+ DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0] << "\n");
+ InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
+ ++ScanIdx;
+ --LRI;
+ }
+
+ auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
+ unsigned NumPHIdValues = 0;
+ for (auto *I : *LRI)
+ for (auto *V : PHIOperands[I])
+ if (InstructionsToSink.count(V) == 0)
+ ++NumPHIdValues;
+ DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
+ unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
+ if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
+ NumPHIInsts++;
+
+ return NumPHIInsts <= 1;
+ };
+
+ if (ScanIdx > 0 && Cond) {
+ // Check if we would actually sink anything first! This mutates the CFG and
+ // adds an extra block. The goal in doing this is to allow instructions that
+ // couldn't be sunk before to be sunk - obviously, speculatable instructions
+ // (such as trunc, add) can be sunk and predicated already. So we check that
+ // we're going to sink at least one non-speculatable instruction.
+ LRI.reset();
+ unsigned Idx = 0;
+ bool Profitable = false;
+ while (ProfitableToSinkInstruction(LRI) && Idx < ScanIdx) {
+ if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
+ Profitable = true;
+ break;
+ }
+ --LRI;
+ ++Idx;
+ }
+ if (!Profitable)
+ return false;
+
+ DEBUG(dbgs() << "SINK: Splitting edge\n");
+ // We have a conditional edge and we're going to sink some instructions.
+ // Insert a new block postdominating all blocks we're going to sink from.
+ if (!SplitBlockPredecessors(BI1->getSuccessor(0), UnconditionalPreds,
+ ".sink.split"))
+ // Edges couldn't be split.
+ return false;
+ Changed = true;
+ }
+
+ // Now that we've analyzed all potential sinking candidates, perform the
+ // actual sink. We iteratively sink the last non-terminator of the source
+ // blocks into their common successor unless doing so would require too
+ // many PHI instructions to be generated (currently only one PHI is allowed
+ // per sunk instruction).
+ //
+ // We can use InstructionsToSink to discount values needing PHI-merging that will
+ // actually be sunk in a later iteration. This allows us to be more
+ // aggressive in what we sink. This does allow a false positive where we
+ // sink presuming a later value will also be sunk, but stop half way through
+ // and never actually sink it which means we produce more PHIs than intended.
+ // This is unlikely in practice though.
+ for (unsigned SinkIdx = 0; SinkIdx != ScanIdx; ++SinkIdx) {
+ DEBUG(dbgs() << "SINK: Sink: "
+ << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
+ << "\n");
+
+ // Because we've sunk every instruction in turn, the current instruction to
+ // sink is always at index 0.
+ LRI.reset();
+ if (!ProfitableToSinkInstruction(LRI)) {
+ // Too many PHIs would be created.
+ DEBUG(dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
+ break;
+ }
+
+ if (!sinkLastInstruction(UnconditionalPreds))
+ return Changed;
+ NumSinkCommons++;
+ Changed = true;
+ }
+ return Changed;
+}
+
+/// \brief Determine if we can hoist sink a sole store instruction out of a
+/// conditional block.
+///
+/// We are looking for code like the following:
+/// BrBB:
+/// store i32 %add, i32* %arrayidx2
+/// ... // No other stores or function calls (we could be calling a memory
+/// ... // function).
+/// %cmp = icmp ult %x, %y
+/// br i1 %cmp, label %EndBB, label %ThenBB
+/// ThenBB:
+/// store i32 %add5, i32* %arrayidx2
+/// br label EndBB
+/// EndBB:
+/// ...
+/// We are going to transform this into:
+/// BrBB:
+/// store i32 %add, i32* %arrayidx2
+/// ... //
+/// %cmp = icmp ult %x, %y
+/// %add.add5 = select i1 %cmp, i32 %add, %add5
+/// store i32 %add.add5, i32* %arrayidx2
+/// ...
+///
+/// \return The pointer to the value of the previous store if the store can be
+/// hoisted into the predecessor block. 0 otherwise.
+static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
+ BasicBlock *StoreBB, BasicBlock *EndBB) {
+ StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
+ if (!StoreToHoist)
+ return nullptr;
+
+ // Volatile or atomic.
+ if (!StoreToHoist->isSimple())
+ return nullptr;
+
+ Value *StorePtr = StoreToHoist->getPointerOperand();
+
+ // Look for a store to the same pointer in BrBB.
+ unsigned MaxNumInstToLookAt = 9;
+ for (Instruction &CurI : reverse(*BrBB)) {
+ if (!MaxNumInstToLookAt)
+ break;
+ // Skip debug info.
+ if (isa<DbgInfoIntrinsic>(CurI))
+ continue;
+ --MaxNumInstToLookAt;
+
+ // Could be calling an instruction that affects memory like free().
+ if (CurI.mayHaveSideEffects() && !isa<StoreInst>(CurI))
+ return nullptr;
+
+ if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
+ // Found the previous store make sure it stores to the same location.
+ if (SI->getPointerOperand() == StorePtr)
+ // Found the previous store, return its value operand.
+ return SI->getValueOperand();
+ return nullptr; // Unknown store.
+ }
+ }
+
+ return nullptr;
+}
+
+/// \brief Speculate a conditional basic block flattening the CFG.
+///
+/// Note that this is a very risky transform currently. Speculating
+/// instructions like this is most often not desirable. Instead, there is an MI
+/// pass which can do it with full awareness of the resource constraints.
+/// However, some cases are "obvious" and we should do directly. An example of
+/// this is speculating a single, reasonably cheap instruction.
+///
+/// There is only one distinct advantage to flattening the CFG at the IR level:
+/// it makes very common but simplistic optimizations such as are common in
+/// instcombine and the DAG combiner more powerful by removing CFG edges and
+/// modeling their effects with easier to reason about SSA value graphs.
+///
+///
+/// An illustration of this transform is turning this IR:
+/// \code
+/// BB:
+/// %cmp = icmp ult %x, %y
+/// br i1 %cmp, label %EndBB, label %ThenBB
+/// ThenBB:
+/// %sub = sub %x, %y
+/// br label BB2
+/// EndBB:
+/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
+/// ...
+/// \endcode
+///
+/// Into this IR:
+/// \code
+/// BB:
+/// %cmp = icmp ult %x, %y
+/// %sub = sub %x, %y
+/// %cond = select i1 %cmp, 0, %sub
+/// ...
+/// \endcode
+///
+/// \returns true if the conditional block is removed.
+static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
+ const TargetTransformInfo &TTI) {
+ // Be conservative for now. FP select instruction can often be expensive.
+ Value *BrCond = BI->getCondition();
+ if (isa<FCmpInst>(BrCond))
+ return false;
+
+ BasicBlock *BB = BI->getParent();
+ BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
+
+ // If ThenBB is actually on the false edge of the conditional branch, remember
+ // to swap the select operands later.
+ bool Invert = false;
+ if (ThenBB != BI->getSuccessor(0)) {
+ assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
+ Invert = true;
+ }
+ assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
+
+ // Keep a count of how many times instructions are used within CondBB when
+ // they are candidates for sinking into CondBB. Specifically:
+ // - They are defined in BB, and
+ // - They have no side effects, and
+ // - All of their uses are in CondBB.
+ SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
+
+ unsigned SpeculationCost = 0;
+ Value *SpeculatedStoreValue = nullptr;
+ StoreInst *SpeculatedStore = nullptr;
+ for (BasicBlock::iterator BBI = ThenBB->begin(),
+ BBE = std::prev(ThenBB->end());
+ BBI != BBE; ++BBI) {
+ Instruction *I = &*BBI;
+ // Skip debug info.
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ // Only speculatively execute a single instruction (not counting the
+ // terminator) for now.
+ ++SpeculationCost;
+ if (SpeculationCost > 1)
+ return false;
+
+ // Don't hoist the instruction if it's unsafe or expensive.
+ if (!isSafeToSpeculativelyExecute(I) &&
+ !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
+ I, BB, ThenBB, EndBB))))
+ return false;
+ if (!SpeculatedStoreValue &&
+ ComputeSpeculationCost(I, TTI) >
+ PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
+ return false;
+
+ // Store the store speculation candidate.
+ if (SpeculatedStoreValue)
+ SpeculatedStore = cast<StoreInst>(I);
+
+ // Do not hoist the instruction if any of its operands are defined but not
+ // used in BB. The transformation will prevent the operand from
+ // being sunk into the use block.
+ for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) {
+ Instruction *OpI = dyn_cast<Instruction>(*i);
+ if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
+ continue; // Not a candidate for sinking.
+
+ ++SinkCandidateUseCounts[OpI];
+ }
+ }
+
+ // Consider any sink candidates which are only used in CondBB as costs for
+ // speculation. Note, while we iterate over a DenseMap here, we are summing
+ // and so iteration order isn't significant.
+ for (SmallDenseMap<Instruction *, unsigned, 4>::iterator
+ I = SinkCandidateUseCounts.begin(),
+ E = SinkCandidateUseCounts.end();
+ I != E; ++I)
+ if (I->first->getNumUses() == I->second) {
+ ++SpeculationCost;
+ if (SpeculationCost > 1)
+ return false;
+ }
+
+ // Check that the PHI nodes can be converted to selects.
+ bool HaveRewritablePHIs = false;
+ for (BasicBlock::iterator I = EndBB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ Value *OrigV = PN->getIncomingValueForBlock(BB);
+ Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
+
+ // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
+ // Skip PHIs which are trivial.
+ if (ThenV == OrigV)
+ continue;
+
+ // Don't convert to selects if we could remove undefined behavior instead.
+ if (passingValueIsAlwaysUndefined(OrigV, PN) ||
+ passingValueIsAlwaysUndefined(ThenV, PN))
+ return false;
+
+ HaveRewritablePHIs = true;
+ ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
+ ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
+ if (!OrigCE && !ThenCE)
+ continue; // Known safe and cheap.
+
+ if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
+ (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
+ return false;
+ unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0;
+ unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0;
+ unsigned MaxCost =
+ 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
+ if (OrigCost + ThenCost > MaxCost)
+ return false;
+
+ // Account for the cost of an unfolded ConstantExpr which could end up
+ // getting expanded into Instructions.
+ // FIXME: This doesn't account for how many operations are combined in the
+ // constant expression.
+ ++SpeculationCost;
+ if (SpeculationCost > 1)
+ return false;
+ }
+
+ // If there are no PHIs to process, bail early. This helps ensure idempotence
+ // as well.
+ if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
+ return false;
+
+ // If we get here, we can hoist the instruction and if-convert.
+ DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
+
+ // Insert a select of the value of the speculated store.
+ if (SpeculatedStoreValue) {
+ IRBuilder<NoFolder> Builder(BI);
+ Value *TrueV = SpeculatedStore->getValueOperand();
+ Value *FalseV = SpeculatedStoreValue;
+ if (Invert)
+ std::swap(TrueV, FalseV);
+ Value *S = Builder.CreateSelect(
+ BrCond, TrueV, FalseV, TrueV->getName() + "." + FalseV->getName(), BI);
+ SpeculatedStore->setOperand(0, S);
+ SpeculatedStore->setDebugLoc(
+ DILocation::getMergedLocation(
+ BI->getDebugLoc(), SpeculatedStore->getDebugLoc()));
+ }
+
+ // Metadata can be dependent on the condition we are hoisting above.
+ // Conservatively strip all metadata on the instruction.
+ for (auto &I : *ThenBB)
+ I.dropUnknownNonDebugMetadata();
+
+ // Hoist the instructions.
+ BB->getInstList().splice(BI->getIterator(), ThenBB->getInstList(),
+ ThenBB->begin(), std::prev(ThenBB->end()));
+
+ // Insert selects and rewrite the PHI operands.
+ IRBuilder<NoFolder> Builder(BI);
+ for (BasicBlock::iterator I = EndBB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ unsigned OrigI = PN->getBasicBlockIndex(BB);
+ unsigned ThenI = PN->getBasicBlockIndex(ThenBB);
+ Value *OrigV = PN->getIncomingValue(OrigI);
+ Value *ThenV = PN->getIncomingValue(ThenI);
+
+ // Skip PHIs which are trivial.
+ if (OrigV == ThenV)
+ continue;
+
+ // Create a select whose true value is the speculatively executed value and
+ // false value is the preexisting value. Swap them if the branch
+ // destinations were inverted.
+ Value *TrueV = ThenV, *FalseV = OrigV;
+ if (Invert)
+ std::swap(TrueV, FalseV);
+ Value *V = Builder.CreateSelect(
+ BrCond, TrueV, FalseV, TrueV->getName() + "." + FalseV->getName(), BI);
+ PN->setIncomingValue(OrigI, V);
+ PN->setIncomingValue(ThenI, V);
+ }
+
+ ++NumSpeculations;
+ return true;
+}
+
+/// Return true if we can thread a branch across this block.
+static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
+ BranchInst *BI = cast<BranchInst>(BB->getTerminator());
+ unsigned Size = 0;
+
+ for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+ if (isa<DbgInfoIntrinsic>(BBI))
+ continue;
+ if (Size > 10)
+ return false; // Don't clone large BB's.
+ ++Size;
+
+ // We can only support instructions that do not define values that are
+ // live outside of the current basic block.
+ for (User *U : BBI->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ if (UI->getParent() != BB || isa<PHINode>(UI))
+ return false;
+ }
+
+ // Looks ok, continue checking.
+ }
+
+ return true;
+}
+
+/// If we have a conditional branch on a PHI node value that is defined in the
+/// same block as the branch and if any PHI entries are constants, thread edges
+/// corresponding to that entry to be branches to their ultimate destination.
+static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
+ AssumptionCache *AC) {
+ BasicBlock *BB = BI->getParent();
+ PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
+ // NOTE: we currently cannot transform this case if the PHI node is used
+ // outside of the block.
+ if (!PN || PN->getParent() != BB || !PN->hasOneUse())
+ return false;
+
+ // Degenerate case of a single entry PHI.
+ if (PN->getNumIncomingValues() == 1) {
+ FoldSingleEntryPHINodes(PN->getParent());
+ return true;
+ }
+
+ // Now we know that this block has multiple preds and two succs.
+ if (!BlockIsSimpleEnoughToThreadThrough(BB))
+ return false;
+
+ // Can't fold blocks that contain noduplicate or convergent calls.
+ if (any_of(*BB, [](const Instruction &I) {
+ const CallInst *CI = dyn_cast<CallInst>(&I);
+ return CI && (CI->cannotDuplicate() || CI->isConvergent());
+ }))
+ return false;
+
+ // Okay, this is a simple enough basic block. See if any phi values are
+ // constants.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
+ if (!CB || !CB->getType()->isIntegerTy(1))
+ continue;
+
+ // Okay, we now know that all edges from PredBB should be revectored to
+ // branch to RealDest.
+ BasicBlock *PredBB = PN->getIncomingBlock(i);
+ BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
+
+ if (RealDest == BB)
+ continue; // Skip self loops.
+ // Skip if the predecessor's terminator is an indirect branch.
+ if (isa<IndirectBrInst>(PredBB->getTerminator()))
+ continue;
+
+ // The dest block might have PHI nodes, other predecessors and other
+ // difficult cases. Instead of being smart about this, just insert a new
+ // block that jumps to the destination block, effectively splitting
+ // the edge we are about to create.
+ BasicBlock *EdgeBB =
+ BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge",
+ RealDest->getParent(), RealDest);
+ BranchInst::Create(RealDest, EdgeBB);
+
+ // Update PHI nodes.
+ AddPredecessorToBlock(RealDest, EdgeBB, BB);
+
+ // BB may have instructions that are being threaded over. Clone these
+ // instructions into EdgeBB. We know that there will be no uses of the
+ // cloned instructions outside of EdgeBB.
+ BasicBlock::iterator InsertPt = EdgeBB->begin();
+ DenseMap<Value *, Value *> TranslateMap; // Track translated values.
+ for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+ TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
+ continue;
+ }
+ // Clone the instruction.
+ Instruction *N = BBI->clone();
+ if (BBI->hasName())
+ N->setName(BBI->getName() + ".c");
+
+ // Update operands due to translation.
+ for (User::op_iterator i = N->op_begin(), e = N->op_end(); i != e; ++i) {
+ DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(*i);
+ if (PI != TranslateMap.end())
+ *i = PI->second;
+ }
+
+ // Check for trivial simplification.
+ if (Value *V = SimplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
+ if (!BBI->use_empty())
+ TranslateMap[&*BBI] = V;
+ if (!N->mayHaveSideEffects()) {
+ N->deleteValue(); // Instruction folded away, don't need actual inst
+ N = nullptr;
+ }
+ } else {
+ if (!BBI->use_empty())
+ TranslateMap[&*BBI] = N;
+ }
+ // Insert the new instruction into its new home.
+ if (N)
+ EdgeBB->getInstList().insert(InsertPt, N);
+
+ // Register the new instruction with the assumption cache if necessary.
+ if (auto *II = dyn_cast_or_null<IntrinsicInst>(N))
+ if (II->getIntrinsicID() == Intrinsic::assume)
+ AC->registerAssumption(II);
+ }
+
+ // Loop over all of the edges from PredBB to BB, changing them to branch
+ // to EdgeBB instead.
+ TerminatorInst *PredBBTI = PredBB->getTerminator();
+ for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
+ if (PredBBTI->getSuccessor(i) == BB) {
+ BB->removePredecessor(PredBB);
+ PredBBTI->setSuccessor(i, EdgeBB);
+ }
+
+ // Recurse, simplifying any other constants.
+ return FoldCondBranchOnPHI(BI, DL, AC) | true;
+ }
+
+ return false;
+}
+
+/// Given a BB that starts with the specified two-entry PHI node,
+/// see if we can eliminate it.
+static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
+ const DataLayout &DL) {
+ // Ok, this is a two entry PHI node. Check to see if this is a simple "if
+ // statement", which has a very simple dominance structure. Basically, we
+ // are trying to find the condition that is being branched on, which
+ // subsequently causes this merge to happen. We really want control
+ // dependence information for this check, but simplifycfg can't keep it up
+ // to date, and this catches most of the cases we care about anyway.
+ BasicBlock *BB = PN->getParent();
+ BasicBlock *IfTrue, *IfFalse;
+ Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);
+ if (!IfCond ||
+ // Don't bother if the branch will be constant folded trivially.
+ isa<ConstantInt>(IfCond))
+ return false;
+
+ // Okay, we found that we can merge this two-entry phi node into a select.
+ // Doing so would require us to fold *all* two entry phi nodes in this block.
+ // At some point this becomes non-profitable (particularly if the target
+ // doesn't support cmov's). Only do this transformation if there are two or
+ // fewer PHI nodes in this block.
+ unsigned NumPhis = 0;
+ for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
+ if (NumPhis > 2)
+ return false;
+
+ // Loop over the PHI's seeing if we can promote them all to select
+ // instructions. While we are at it, keep track of the instructions
+ // that need to be moved to the dominating block.
+ SmallPtrSet<Instruction *, 4> AggressiveInsts;
+ unsigned MaxCostVal0 = PHINodeFoldingThreshold,
+ MaxCostVal1 = PHINodeFoldingThreshold;
+ MaxCostVal0 *= TargetTransformInfo::TCC_Basic;
+ MaxCostVal1 *= TargetTransformInfo::TCC_Basic;
+
+ for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
+ PHINode *PN = cast<PHINode>(II++);
+ if (Value *V = SimplifyInstruction(PN, {DL, PN})) {
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ continue;
+ }
+
+ if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
+ MaxCostVal0, TTI) ||
+ !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
+ MaxCostVal1, TTI))
+ return false;
+ }
+
+ // If we folded the first phi, PN dangles at this point. Refresh it. If
+ // we ran out of PHIs then we simplified them all.
+ PN = dyn_cast<PHINode>(BB->begin());
+ if (!PN)
+ return true;
+
+ // Don't fold i1 branches on PHIs which contain binary operators. These can
+ // often be turned into switches and other things.
+ if (PN->getType()->isIntegerTy(1) &&
+ (isa<BinaryOperator>(PN->getIncomingValue(0)) ||
+ isa<BinaryOperator>(PN->getIncomingValue(1)) ||
+ isa<BinaryOperator>(IfCond)))
+ return false;
+
+ // If all PHI nodes are promotable, check to make sure that all instructions
+ // in the predecessor blocks can be promoted as well. If not, we won't be able
+ // to get rid of the control flow, so it's not worth promoting to select
+ // instructions.
+ BasicBlock *DomBlock = nullptr;
+ BasicBlock *IfBlock1 = PN->getIncomingBlock(0);
+ BasicBlock *IfBlock2 = PN->getIncomingBlock(1);
+ if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) {
+ IfBlock1 = nullptr;
+ } else {
+ DomBlock = *pred_begin(IfBlock1);
+ for (BasicBlock::iterator I = IfBlock1->begin(); !isa<TerminatorInst>(I);
+ ++I)
+ if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
+ // This is not an aggressive instruction that we can promote.
+ // Because of this, we won't be able to get rid of the control flow, so
+ // the xform is not worth it.
+ return false;
+ }
+ }
+
+ if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
+ IfBlock2 = nullptr;
+ } else {
+ DomBlock = *pred_begin(IfBlock2);
+ for (BasicBlock::iterator I = IfBlock2->begin(); !isa<TerminatorInst>(I);
+ ++I)
+ if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
+ // This is not an aggressive instruction that we can promote.
+ // Because of this, we won't be able to get rid of the control flow, so
+ // the xform is not worth it.
+ return false;
+ }
+ }
+
+ DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: "
+ << IfTrue->getName() << " F: " << IfFalse->getName() << "\n");
+
+ // If we can still promote the PHI nodes after this gauntlet of tests,
+ // do all of the PHI's now.
+ Instruction *InsertPt = DomBlock->getTerminator();
+ IRBuilder<NoFolder> Builder(InsertPt);
+
+ // Move all 'aggressive' instructions, which are defined in the
+ // conditional parts of the if's up to the dominating block.
+ if (IfBlock1) {
+ for (auto &I : *IfBlock1)
+ I.dropUnknownNonDebugMetadata();
+ DomBlock->getInstList().splice(InsertPt->getIterator(),
+ IfBlock1->getInstList(), IfBlock1->begin(),
+ IfBlock1->getTerminator()->getIterator());
+ }
+ if (IfBlock2) {
+ for (auto &I : *IfBlock2)
+ I.dropUnknownNonDebugMetadata();
+ DomBlock->getInstList().splice(InsertPt->getIterator(),
+ IfBlock2->getInstList(), IfBlock2->begin(),
+ IfBlock2->getTerminator()->getIterator());
+ }
+
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ // Change the PHI node into a select instruction.
+ Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
+ Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
+
+ Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", InsertPt);
+ PN->replaceAllUsesWith(Sel);
+ Sel->takeName(PN);
+ PN->eraseFromParent();
+ }
+
+ // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
+ // has been flattened. Change DomBlock to jump directly to our new block to
+ // avoid other simplifycfg's kicking in on the diamond.
+ TerminatorInst *OldTI = DomBlock->getTerminator();
+ Builder.SetInsertPoint(OldTI);
+ Builder.CreateBr(BB);
+ OldTI->eraseFromParent();
+ return true;
+}
+
+/// If we found a conditional branch that goes to two returning blocks,
+/// try to merge them together into one return,
+/// introducing a select if the return values disagree.
+static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
+ IRBuilder<> &Builder) {
+ assert(BI->isConditional() && "Must be a conditional branch");
+ BasicBlock *TrueSucc = BI->getSuccessor(0);
+ BasicBlock *FalseSucc = BI->getSuccessor(1);
+ ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator());
+ ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator());
+
+ // Check to ensure both blocks are empty (just a return) or optionally empty
+ // with PHI nodes. If there are other instructions, merging would cause extra
+ // computation on one path or the other.
+ if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator())
+ return false;
+ if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator())
+ return false;
+
+ Builder.SetInsertPoint(BI);
+ // Okay, we found a branch that is going to two return nodes. If
+ // there is no return value for this function, just change the
+ // branch into a return.
+ if (FalseRet->getNumOperands() == 0) {
+ TrueSucc->removePredecessor(BI->getParent());
+ FalseSucc->removePredecessor(BI->getParent());
+ Builder.CreateRetVoid();
+ EraseTerminatorInstAndDCECond(BI);
+ return true;
+ }
+
+ // Otherwise, figure out what the true and false return values are
+ // so we can insert a new select instruction.
+ Value *TrueValue = TrueRet->getReturnValue();
+ Value *FalseValue = FalseRet->getReturnValue();
+
+ // Unwrap any PHI nodes in the return blocks.
+ if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue))
+ if (TVPN->getParent() == TrueSucc)
+ TrueValue = TVPN->getIncomingValueForBlock(BI->getParent());
+ if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue))
+ if (FVPN->getParent() == FalseSucc)
+ FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
+
+ // In order for this transformation to be safe, we must be able to
+ // unconditionally execute both operands to the return. This is
+ // normally the case, but we could have a potentially-trapping
+ // constant expression that prevents this transformation from being
+ // safe.
+ if (ConstantExpr *TCV = dyn_cast_or_null<ConstantExpr>(TrueValue))
+ if (TCV->canTrap())
+ return false;
+ if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue))
+ if (FCV->canTrap())
+ return false;
+
+ // Okay, we collected all the mapped values and checked them for sanity, and
+ // defined to really do this transformation. First, update the CFG.
+ TrueSucc->removePredecessor(BI->getParent());
+ FalseSucc->removePredecessor(BI->getParent());
+
+ // Insert select instructions where needed.
+ Value *BrCond = BI->getCondition();
+ if (TrueValue) {
+ // Insert a select if the results differ.
+ if (TrueValue == FalseValue || isa<UndefValue>(FalseValue)) {
+ } else if (isa<UndefValue>(TrueValue)) {
+ TrueValue = FalseValue;
+ } else {
+ TrueValue =
+ Builder.CreateSelect(BrCond, TrueValue, FalseValue, "retval", BI);
+ }
+ }
+
+ Value *RI =
+ !TrueValue ? Builder.CreateRetVoid() : Builder.CreateRet(TrueValue);
+
+ (void)RI;
+
+ DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
+ << "\n " << *BI << "NewRet = " << *RI
+ << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: " << *FalseSucc);
+
+ EraseTerminatorInstAndDCECond(BI);
+
+ return true;
+}
+
+/// Return true if the given instruction is available
+/// in its predecessor block. If yes, the instruction will be removed.
+static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) {
+ if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst))
+ return false;
+ for (Instruction &I : *PB) {
+ Instruction *PBI = &I;
+ // Check whether Inst and PBI generate the same value.
+ if (Inst->isIdenticalTo(PBI)) {
+ Inst->replaceAllUsesWith(PBI);
+ Inst->eraseFromParent();
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Return true if either PBI or BI has branch weight available, and store
+/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
+/// not have branch weight, use 1:1 as its weight.
+static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
+ uint64_t &PredTrueWeight,
+ uint64_t &PredFalseWeight,
+ uint64_t &SuccTrueWeight,
+ uint64_t &SuccFalseWeight) {
+ bool PredHasWeights =
+ PBI->extractProfMetadata(PredTrueWeight, PredFalseWeight);
+ bool SuccHasWeights =
+ BI->extractProfMetadata(SuccTrueWeight, SuccFalseWeight);
+ if (PredHasWeights || SuccHasWeights) {
+ if (!PredHasWeights)
+ PredTrueWeight = PredFalseWeight = 1;
+ if (!SuccHasWeights)
+ SuccTrueWeight = SuccFalseWeight = 1;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+/// If this basic block is simple enough, and if a predecessor branches to us
+/// and one of our successors, fold the block into the predecessor and use
+/// logical operations to pick the right destination.
+bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
+ BasicBlock *BB = BI->getParent();
+
+ Instruction *Cond = nullptr;
+ if (BI->isConditional())
+ Cond = dyn_cast<Instruction>(BI->getCondition());
+ else {
+ // For unconditional branch, check for a simple CFG pattern, where
+ // BB has a single predecessor and BB's successor is also its predecessor's
+ // successor. If such pattern exisits, check for CSE between BB and its
+ // predecessor.
+ if (BasicBlock *PB = BB->getSinglePredecessor())
+ if (BranchInst *PBI = dyn_cast<BranchInst>(PB->getTerminator()))
+ if (PBI->isConditional() &&
+ (BI->getSuccessor(0) == PBI->getSuccessor(0) ||
+ BI->getSuccessor(0) == PBI->getSuccessor(1))) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
+ Instruction *Curr = &*I++;
+ if (isa<CmpInst>(Curr)) {
+ Cond = Curr;
+ break;
+ }
+ // Quit if we can't remove this instruction.
+ if (!checkCSEInPredecessor(Curr, PB))
+ return false;
+ }
+ }
+
+ if (!Cond)
+ return false;
+ }
+
+ if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
+ Cond->getParent() != BB || !Cond->hasOneUse())
+ return false;
+
+ // Make sure the instruction after the condition is the cond branch.
+ BasicBlock::iterator CondIt = ++Cond->getIterator();
+
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(CondIt))
+ ++CondIt;
+
+ if (&*CondIt != BI)
+ return false;
+
+ // Only allow this transformation if computing the condition doesn't involve
+ // too many instructions and these involved instructions can be executed
+ // unconditionally. We denote all involved instructions except the condition
+ // as "bonus instructions", and only allow this transformation when the
+ // number of the bonus instructions does not exceed a certain threshold.
+ unsigned NumBonusInsts = 0;
+ for (auto I = BB->begin(); Cond != &*I; ++I) {
+ // Ignore dbg intrinsics.
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+ if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(&*I))
+ return false;
+ // I has only one use and can be executed unconditionally.
+ Instruction *User = dyn_cast<Instruction>(I->user_back());
+ if (User == nullptr || User->getParent() != BB)
+ return false;
+ // I is used in the same BB. Since BI uses Cond and doesn't have more slots
+ // to use any other instruction, User must be an instruction between next(I)
+ // and Cond.
+ ++NumBonusInsts;
+ // Early exits once we reach the limit.
+ if (NumBonusInsts > BonusInstThreshold)
+ return false;
+ }
+
+ // Cond is known to be a compare or binary operator. Check to make sure that
+ // neither operand is a potentially-trapping constant expression.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0)))
+ if (CE->canTrap())
+ return false;
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
+ if (CE->canTrap())
+ return false;
+
+ // Finally, don't infinitely unroll conditional loops.
+ BasicBlock *TrueDest = BI->getSuccessor(0);
+ BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr;
+ if (TrueDest == BB || FalseDest == BB)
+ return false;
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *PredBlock = *PI;
+ BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
+
+ // Check that we have two conditional branches. If there is a PHI node in
+ // the common successor, verify that the same value flows in from both
+ // blocks.
+ SmallVector<PHINode *, 4> PHIs;
+ if (!PBI || PBI->isUnconditional() ||
+ (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) ||
+ (!BI->isConditional() &&
+ !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs)))
+ continue;
+
+ // Determine if the two branches share a common destination.
+ Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd;
+ bool InvertPredCond = false;
+
+ if (BI->isConditional()) {
+ if (PBI->getSuccessor(0) == TrueDest) {
+ Opc = Instruction::Or;
+ } else if (PBI->getSuccessor(1) == FalseDest) {
+ Opc = Instruction::And;
+ } else if (PBI->getSuccessor(0) == FalseDest) {
+ Opc = Instruction::And;
+ InvertPredCond = true;
+ } else if (PBI->getSuccessor(1) == TrueDest) {
+ Opc = Instruction::Or;
+ InvertPredCond = true;
+ } else {
+ continue;
+ }
+ } else {
+ if (PBI->getSuccessor(0) != TrueDest && PBI->getSuccessor(1) != TrueDest)
+ continue;
+ }
+
+ DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
+ IRBuilder<> Builder(PBI);
+
+ // If we need to invert the condition in the pred block to match, do so now.
+ if (InvertPredCond) {
+ Value *NewCond = PBI->getCondition();
+
+ if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
+ CmpInst *CI = cast<CmpInst>(NewCond);
+ CI->setPredicate(CI->getInversePredicate());
+ } else {
+ NewCond =
+ Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not");
+ }
+
+ PBI->setCondition(NewCond);
+ PBI->swapSuccessors();
+ }
+
+ // If we have bonus instructions, clone them into the predecessor block.
+ // Note that there may be multiple predecessor blocks, so we cannot move
+ // bonus instructions to a predecessor block.
+ ValueToValueMapTy VMap; // maps original values to cloned values
+ // We already make sure Cond is the last instruction before BI. Therefore,
+ // all instructions before Cond other than DbgInfoIntrinsic are bonus
+ // instructions.
+ for (auto BonusInst = BB->begin(); Cond != &*BonusInst; ++BonusInst) {
+ if (isa<DbgInfoIntrinsic>(BonusInst))
+ continue;
+ Instruction *NewBonusInst = BonusInst->clone();
+ RemapInstruction(NewBonusInst, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ VMap[&*BonusInst] = NewBonusInst;
+
+ // If we moved a load, we cannot any longer claim any knowledge about
+ // its potential value. The previous information might have been valid
+ // only given the branch precondition.
+ // For an analogous reason, we must also drop all the metadata whose
+ // semantics we don't understand.
+ NewBonusInst->dropUnknownNonDebugMetadata();
+
+ PredBlock->getInstList().insert(PBI->getIterator(), NewBonusInst);
+ NewBonusInst->takeName(&*BonusInst);
+ BonusInst->setName(BonusInst->getName() + ".old");
+ }
+
+ // Clone Cond into the predecessor basic block, and or/and the
+ // two conditions together.
+ Instruction *New = Cond->clone();
+ RemapInstruction(New, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ PredBlock->getInstList().insert(PBI->getIterator(), New);
+ New->takeName(Cond);
+ Cond->setName(New->getName() + ".old");
+
+ if (BI->isConditional()) {
+ Instruction *NewCond = cast<Instruction>(
+ Builder.CreateBinOp(Opc, PBI->getCondition(), New, "or.cond"));
+ PBI->setCondition(NewCond);
+
+ uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
+ bool HasWeights =
+ extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
+ SuccTrueWeight, SuccFalseWeight);
+ SmallVector<uint64_t, 8> NewWeights;
+
+ if (PBI->getSuccessor(0) == BB) {
+ if (HasWeights) {
+ // PBI: br i1 %x, BB, FalseDest
+ // BI: br i1 %y, TrueDest, FalseDest
+ // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
+ NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
+ // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
+ // TrueWeight for PBI * FalseWeight for BI.
+ // We assume that total weights of a BranchInst can fit into 32 bits.
+ // Therefore, we will not have overflow using 64-bit arithmetic.
+ NewWeights.push_back(PredFalseWeight *
+ (SuccFalseWeight + SuccTrueWeight) +
+ PredTrueWeight * SuccFalseWeight);
+ }
+ AddPredecessorToBlock(TrueDest, PredBlock, BB);
+ PBI->setSuccessor(0, TrueDest);
+ }
+ if (PBI->getSuccessor(1) == BB) {
+ if (HasWeights) {
+ // PBI: br i1 %x, TrueDest, BB
+ // BI: br i1 %y, TrueDest, FalseDest
+ // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
+ // FalseWeight for PBI * TrueWeight for BI.
+ NewWeights.push_back(PredTrueWeight *
+ (SuccFalseWeight + SuccTrueWeight) +
+ PredFalseWeight * SuccTrueWeight);
+ // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
+ NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
+ }
+ AddPredecessorToBlock(FalseDest, PredBlock, BB);
+ PBI->setSuccessor(1, FalseDest);
+ }
+ if (NewWeights.size() == 2) {
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(NewWeights);
+
+ SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),
+ NewWeights.end());
+ PBI->setMetadata(
+ LLVMContext::MD_prof,
+ MDBuilder(BI->getContext()).createBranchWeights(MDWeights));
+ } else
+ PBI->setMetadata(LLVMContext::MD_prof, nullptr);
+ } else {
+ // Update PHI nodes in the common successors.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+ ConstantInt *PBI_C = cast<ConstantInt>(
+ PHIs[i]->getIncomingValueForBlock(PBI->getParent()));
+ assert(PBI_C->getType()->isIntegerTy(1));
+ Instruction *MergedCond = nullptr;
+ if (PBI->getSuccessor(0) == TrueDest) {
+ // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value)
+ // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value)
+ // is false: !PBI_Cond and BI_Value
+ Instruction *NotCond = cast<Instruction>(
+ Builder.CreateNot(PBI->getCondition(), "not.cond"));
+ MergedCond = cast<Instruction>(
+ Builder.CreateBinOp(Instruction::And, NotCond, New, "and.cond"));
+ if (PBI_C->isOne())
+ MergedCond = cast<Instruction>(Builder.CreateBinOp(
+ Instruction::Or, PBI->getCondition(), MergedCond, "or.cond"));
+ } else {
+ // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C)
+ // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond)
+ // is false: PBI_Cond and BI_Value
+ MergedCond = cast<Instruction>(Builder.CreateBinOp(
+ Instruction::And, PBI->getCondition(), New, "and.cond"));
+ if (PBI_C->isOne()) {
+ Instruction *NotCond = cast<Instruction>(
+ Builder.CreateNot(PBI->getCondition(), "not.cond"));
+ MergedCond = cast<Instruction>(Builder.CreateBinOp(
+ Instruction::Or, NotCond, MergedCond, "or.cond"));
+ }
+ }
+ // Update PHI Node.
+ PHIs[i]->setIncomingValue(PHIs[i]->getBasicBlockIndex(PBI->getParent()),
+ MergedCond);
+ }
+ // Change PBI from Conditional to Unconditional.
+ BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI);
+ EraseTerminatorInstAndDCECond(PBI);
+ PBI = New_PBI;
+ }
+
+ // If BI was a loop latch, it may have had associated loop metadata.
+ // We need to copy it to the new latch, that is, PBI.
+ if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
+ PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
+
+ // TODO: If BB is reachable from all paths through PredBlock, then we
+ // could replace PBI's branch probabilities with BI's.
+
+ // Copy any debug value intrinsics into the end of PredBlock.
+ for (Instruction &I : *BB)
+ if (isa<DbgInfoIntrinsic>(I))
+ I.clone()->insertBefore(PBI);
+
+ return true;
+ }
+ return false;
+}
+
+// If there is only one store in BB1 and BB2, return it, otherwise return
+// nullptr.
+static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) {
+ StoreInst *S = nullptr;
+ for (auto *BB : {BB1, BB2}) {
+ if (!BB)
+ continue;
+ for (auto &I : *BB)
+ if (auto *SI = dyn_cast<StoreInst>(&I)) {
+ if (S)
+ // Multiple stores seen.
+ return nullptr;
+ else
+ S = SI;
+ }
+ }
+ return S;
+}
+
+static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
+ Value *AlternativeV = nullptr) {
+ // PHI is going to be a PHI node that allows the value V that is defined in
+ // BB to be referenced in BB's only successor.
+ //
+ // If AlternativeV is nullptr, the only value we care about in PHI is V. It
+ // doesn't matter to us what the other operand is (it'll never get used). We
+ // could just create a new PHI with an undef incoming value, but that could
+ // increase register pressure if EarlyCSE/InstCombine can't fold it with some
+ // other PHI. So here we directly look for some PHI in BB's successor with V
+ // as an incoming operand. If we find one, we use it, else we create a new
+ // one.
+ //
+ // If AlternativeV is not nullptr, we care about both incoming values in PHI.
+ // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
+ // where OtherBB is the single other predecessor of BB's only successor.
+ PHINode *PHI = nullptr;
+ BasicBlock *Succ = BB->getSingleSuccessor();
+
+ for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
+ if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
+ PHI = cast<PHINode>(I);
+ if (!AlternativeV)
+ break;
+
+ assert(std::distance(pred_begin(Succ), pred_end(Succ)) == 2);
+ auto PredI = pred_begin(Succ);
+ BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
+ if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
+ break;
+ PHI = nullptr;
+ }
+ if (PHI)
+ return PHI;
+
+ // If V is not an instruction defined in BB, just return it.
+ if (!AlternativeV &&
+ (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
+ return V;
+
+ PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front());
+ PHI->addIncoming(V, BB);
+ for (BasicBlock *PredBB : predecessors(Succ))
+ if (PredBB != BB)
+ PHI->addIncoming(
+ AlternativeV ? AlternativeV : UndefValue::get(V->getType()), PredBB);
+ return PHI;
+}
+
+static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
+ BasicBlock *QTB, BasicBlock *QFB,
+ BasicBlock *PostBB, Value *Address,
+ bool InvertPCond, bool InvertQCond) {
+ auto IsaBitcastOfPointerType = [](const Instruction &I) {
+ return Operator::getOpcode(&I) == Instruction::BitCast &&
+ I.getType()->isPointerTy();
+ };
+
+ // If we're not in aggressive mode, we only optimize if we have some
+ // confidence that by optimizing we'll allow P and/or Q to be if-converted.
+ auto IsWorthwhile = [&](BasicBlock *BB) {
+ if (!BB)
+ return true;
+ // Heuristic: if the block can be if-converted/phi-folded and the
+ // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
+ // thread this store.
+ unsigned N = 0;
+ for (auto &I : *BB) {
+ // Cheap instructions viable for folding.
+ if (isa<BinaryOperator>(I) || isa<GetElementPtrInst>(I) ||
+ isa<StoreInst>(I))
+ ++N;
+ // Free instructions.
+ else if (isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
+ IsaBitcastOfPointerType(I))
+ continue;
+ else
+ return false;
+ }
+ return N <= PHINodeFoldingThreshold;
+ };
+
+ if (!MergeCondStoresAggressively &&
+ (!IsWorthwhile(PTB) || !IsWorthwhile(PFB) || !IsWorthwhile(QTB) ||
+ !IsWorthwhile(QFB)))
+ return false;
+
+ // For every pointer, there must be exactly two stores, one coming from
+ // PTB or PFB, and the other from QTB or QFB. We don't support more than one
+ // store (to any address) in PTB,PFB or QTB,QFB.
+ // FIXME: We could relax this restriction with a bit more work and performance
+ // testing.
+ StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
+ StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
+ if (!PStore || !QStore)
+ return false;
+
+ // Now check the stores are compatible.
+ if (!QStore->isUnordered() || !PStore->isUnordered())
+ return false;
+
+ // Check that sinking the store won't cause program behavior changes. Sinking
+ // the store out of the Q blocks won't change any behavior as we're sinking
+ // from a block to its unconditional successor. But we're moving a store from
+ // the P blocks down through the middle block (QBI) and past both QFB and QTB.
+ // So we need to check that there are no aliasing loads or stores in
+ // QBI, QTB and QFB. We also need to check there are no conflicting memory
+ // operations between PStore and the end of its parent block.
+ //
+ // The ideal way to do this is to query AliasAnalysis, but we don't
+ // preserve AA currently so that is dangerous. Be super safe and just
+ // check there are no other memory operations at all.
+ for (auto &I : *QFB->getSinglePredecessor())
+ if (I.mayReadOrWriteMemory())
+ return false;
+ for (auto &I : *QFB)
+ if (&I != QStore && I.mayReadOrWriteMemory())
+ return false;
+ if (QTB)
+ for (auto &I : *QTB)
+ if (&I != QStore && I.mayReadOrWriteMemory())
+ return false;
+ for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
+ I != E; ++I)
+ if (&*I != PStore && I->mayReadOrWriteMemory())
+ return false;
+
+ // OK, we're going to sink the stores to PostBB. The store has to be
+ // conditional though, so first create the predicate.
+ Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
+ ->getCondition();
+ Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
+ ->getCondition();
+
+ Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(),
+ PStore->getParent());
+ Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(),
+ QStore->getParent(), PPHI);
+
+ IRBuilder<> QB(&*PostBB->getFirstInsertionPt());
+
+ Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
+ Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
+
+ if (InvertPCond)
+ PPred = QB.CreateNot(PPred);
+ if (InvertQCond)
+ QPred = QB.CreateNot(QPred);
+ Value *CombinedPred = QB.CreateOr(PPred, QPred);
+
+ auto *T =
+ SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), false);
+ QB.SetInsertPoint(T);
+ StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
+ AAMDNodes AAMD;
+ PStore->getAAMetadata(AAMD, /*Merge=*/false);
+ PStore->getAAMetadata(AAMD, /*Merge=*/true);
+ SI->setAAMetadata(AAMD);
+
+ QStore->eraseFromParent();
+ PStore->eraseFromParent();
+
+ return true;
+}
+
+static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {
+ // The intention here is to find diamonds or triangles (see below) where each
+ // conditional block contains a store to the same address. Both of these
+ // stores are conditional, so they can't be unconditionally sunk. But it may
+ // be profitable to speculatively sink the stores into one merged store at the
+ // end, and predicate the merged store on the union of the two conditions of
+ // PBI and QBI.
+ //
+ // This can reduce the number of stores executed if both of the conditions are
+ // true, and can allow the blocks to become small enough to be if-converted.
+ // This optimization will also chain, so that ladders of test-and-set
+ // sequences can be if-converted away.
+ //
+ // We only deal with simple diamonds or triangles:
+ //
+ // PBI or PBI or a combination of the two
+ // / \ | \
+ // PTB PFB | PFB
+ // \ / | /
+ // QBI QBI
+ // / \ | \
+ // QTB QFB | QFB
+ // \ / | /
+ // PostBB PostBB
+ //
+ // We model triangles as a type of diamond with a nullptr "true" block.
+ // Triangles are canonicalized so that the fallthrough edge is represented by
+ // a true condition, as in the diagram above.
+ //
+ BasicBlock *PTB = PBI->getSuccessor(0);
+ BasicBlock *PFB = PBI->getSuccessor(1);
+ BasicBlock *QTB = QBI->getSuccessor(0);
+ BasicBlock *QFB = QBI->getSuccessor(1);
+ BasicBlock *PostBB = QFB->getSingleSuccessor();
+
+ // Make sure we have a good guess for PostBB. If QTB's only successor is
+ // QFB, then QFB is a better PostBB.
+ if (QTB->getSingleSuccessor() == QFB)
+ PostBB = QFB;
+
+ // If we couldn't find a good PostBB, stop.
+ if (!PostBB)
+ return false;
+
+ bool InvertPCond = false, InvertQCond = false;
+ // Canonicalize fallthroughs to the true branches.
+ if (PFB == QBI->getParent()) {
+ std::swap(PFB, PTB);
+ InvertPCond = true;
+ }
+ if (QFB == PostBB) {
+ std::swap(QFB, QTB);
+ InvertQCond = true;
+ }
+
+ // From this point on we can assume PTB or QTB may be fallthroughs but PFB
+ // and QFB may not. Model fallthroughs as a nullptr block.
+ if (PTB == QBI->getParent())
+ PTB = nullptr;
+ if (QTB == PostBB)
+ QTB = nullptr;
+
+ // Legality bailouts. We must have at least the non-fallthrough blocks and
+ // the post-dominating block, and the non-fallthroughs must only have one
+ // predecessor.
+ auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
+ return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
+ };
+ if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
+ !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
+ return false;
+ if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
+ (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
+ return false;
+ if (!PostBB->hasNUses(2) || !QBI->getParent()->hasNUses(2))
+ return false;
+
+ // OK, this is a sequence of two diamonds or triangles.
+ // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
+ SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
+ for (auto *BB : {PTB, PFB}) {
+ if (!BB)
+ continue;
+ for (auto &I : *BB)
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+ PStoreAddresses.insert(SI->getPointerOperand());
+ }
+ for (auto *BB : {QTB, QFB}) {
+ if (!BB)
+ continue;
+ for (auto &I : *BB)
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+ QStoreAddresses.insert(SI->getPointerOperand());
+ }
+
+ set_intersect(PStoreAddresses, QStoreAddresses);
+ // set_intersect mutates PStoreAddresses in place. Rename it here to make it
+ // clear what it contains.
+ auto &CommonAddresses = PStoreAddresses;
+
+ bool Changed = false;
+ for (auto *Address : CommonAddresses)
+ Changed |= mergeConditionalStoreToAddress(
+ PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond);
+ return Changed;
+}
+
+/// If we have a conditional branch as a predecessor of another block,
+/// this function tries to simplify it. We know
+/// that PBI and BI are both conditional branches, and BI is in one of the
+/// successor blocks of PBI - PBI branches to BI.
+static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
+ const DataLayout &DL) {
+ assert(PBI->isConditional() && BI->isConditional());
+ BasicBlock *BB = BI->getParent();
+
+ // If this block ends with a branch instruction, and if there is a
+ // predecessor that ends on a branch of the same condition, make
+ // this conditional branch redundant.
+ if (PBI->getCondition() == BI->getCondition() &&
+ PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+ // Okay, the outcome of this conditional branch is statically
+ // knowable. If this block had a single pred, handle specially.
+ if (BB->getSinglePredecessor()) {
+ // Turn this into a branch on constant.
+ bool CondIsTrue = PBI->getSuccessor(0) == BB;
+ BI->setCondition(
+ ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
+ return true; // Nuke the branch on constant.
+ }
+
+ // Otherwise, if there are multiple predecessors, insert a PHI that merges
+ // in the constant and simplify the block result. Subsequent passes of
+ // simplifycfg will thread the block.
+ if (BlockIsSimpleEnoughToThreadThrough(BB)) {
+ pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
+ PHINode *NewPN = PHINode::Create(
+ Type::getInt1Ty(BB->getContext()), std::distance(PB, PE),
+ BI->getCondition()->getName() + ".pr", &BB->front());
+ // Okay, we're going to insert the PHI node. Since PBI is not the only
+ // predecessor, compute the PHI'd conditional value for all of the preds.
+ // Any predecessor where the condition is not computable we keep symbolic.
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) && PBI != BI &&
+ PBI->isConditional() && PBI->getCondition() == BI->getCondition() &&
+ PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+ bool CondIsTrue = PBI->getSuccessor(0) == BB;
+ NewPN->addIncoming(
+ ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue),
+ P);
+ } else {
+ NewPN->addIncoming(BI->getCondition(), P);
+ }
+ }
+
+ BI->setCondition(NewPN);
+ return true;
+ }
+ }
+
+ if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
+ if (CE->canTrap())
+ return false;
+
+ // If both branches are conditional and both contain stores to the same
+ // address, remove the stores from the conditionals and create a conditional
+ // merged store at the end.
+ if (MergeCondStores && mergeConditionalStores(PBI, BI))
+ return true;
+
+ // If this is a conditional branch in an empty block, and if any
+ // predecessors are a conditional branch to one of our destinations,
+ // fold the conditions into logical ops and one cond br.
+ BasicBlock::iterator BBI = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ if (&*BBI != BI)
+ return false;
+
+ int PBIOp, BIOp;
+ if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
+ PBIOp = 0;
+ BIOp = 0;
+ } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
+ PBIOp = 0;
+ BIOp = 1;
+ } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
+ PBIOp = 1;
+ BIOp = 0;
+ } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
+ PBIOp = 1;
+ BIOp = 1;
+ } else {
+ return false;
+ }
+
+ // Check to make sure that the other destination of this branch
+ // isn't BB itself. If so, this is an infinite loop that will
+ // keep getting unwound.
+ if (PBI->getSuccessor(PBIOp) == BB)
+ return false;
+
+ // Do not perform this transformation if it would require
+ // insertion of a large number of select instructions. For targets
+ // without predication/cmovs, this is a big pessimization.
+
+ // Also do not perform this transformation if any phi node in the common
+ // destination block can trap when reached by BB or PBB (PR17073). In that
+ // case, it would be unsafe to hoist the operation into a select instruction.
+
+ BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
+ unsigned NumPhis = 0;
+ for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
+ ++II, ++NumPhis) {
+ if (NumPhis > 2) // Disable this xform.
+ return false;
+
+ PHINode *PN = cast<PHINode>(II);
+ Value *BIV = PN->getIncomingValueForBlock(BB);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BIV))
+ if (CE->canTrap())
+ return false;
+
+ unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
+ Value *PBIV = PN->getIncomingValue(PBBIdx);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(PBIV))
+ if (CE->canTrap())
+ return false;
+ }
+
+ // Finally, if everything is ok, fold the branches to logical ops.
+ BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
+
+ DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
+ << "AND: " << *BI->getParent());
+
+ // If OtherDest *is* BB, then BB is a basic block with a single conditional
+ // branch in it, where one edge (OtherDest) goes back to itself but the other
+ // exits. We don't *know* that the program avoids the infinite loop
+ // (even though that seems likely). If we do this xform naively, we'll end up
+ // recursively unpeeling the loop. Since we know that (after the xform is
+ // done) that the block *is* infinite if reached, we just make it an obviously
+ // infinite loop with no cond branch.
+ if (OtherDest == BB) {
+ // Insert it at the end of the function, because it's either code,
+ // or it won't matter if it's hot. :)
+ BasicBlock *InfLoopBlock =
+ BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
+ BranchInst::Create(InfLoopBlock, InfLoopBlock);
+ OtherDest = InfLoopBlock;
+ }
+
+ DEBUG(dbgs() << *PBI->getParent()->getParent());
+
+ // BI may have other predecessors. Because of this, we leave
+ // it alone, but modify PBI.
+
+ // Make sure we get to CommonDest on True&True directions.
+ Value *PBICond = PBI->getCondition();
+ IRBuilder<NoFolder> Builder(PBI);
+ if (PBIOp)
+ PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
+
+ Value *BICond = BI->getCondition();
+ if (BIOp)
+ BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
+
+ // Merge the conditions.
+ Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge");
+
+ // Modify PBI to branch on the new condition to the new dests.
+ PBI->setCondition(Cond);
+ PBI->setSuccessor(0, CommonDest);
+ PBI->setSuccessor(1, OtherDest);
+
+ // Update branch weight for PBI.
+ uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
+ uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
+ bool HasWeights =
+ extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
+ SuccTrueWeight, SuccFalseWeight);
+ if (HasWeights) {
+ PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
+ PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
+ SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
+ SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
+ // The weight to CommonDest should be PredCommon * SuccTotal +
+ // PredOther * SuccCommon.
+ // The weight to OtherDest should be PredOther * SuccOther.
+ uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
+ PredOther * SuccCommon,
+ PredOther * SuccOther};
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(NewWeights);
+
+ PBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BI->getContext())
+ .createBranchWeights(NewWeights[0], NewWeights[1]));
+ }
+
+ // OtherDest may have phi nodes. If so, add an entry from PBI's
+ // block that are identical to the entries for BI's block.
+ AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
+
+ // We know that the CommonDest already had an edge from PBI to
+ // it. If it has PHIs though, the PHIs may have different
+ // entries for BB and PBI's BB. If so, insert a select to make
+ // them agree.
+ PHINode *PN;
+ for (BasicBlock::iterator II = CommonDest->begin();
+ (PN = dyn_cast<PHINode>(II)); ++II) {
+ Value *BIV = PN->getIncomingValueForBlock(BB);
+ unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
+ Value *PBIV = PN->getIncomingValue(PBBIdx);
+ if (BIV != PBIV) {
+ // Insert a select in PBI to pick the right value.
+ SelectInst *NV = cast<SelectInst>(
+ Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
+ PN->setIncomingValue(PBBIdx, NV);
+ // Although the select has the same condition as PBI, the original branch
+ // weights for PBI do not apply to the new select because the select's
+ // 'logical' edges are incoming edges of the phi that is eliminated, not
+ // the outgoing edges of PBI.
+ if (HasWeights) {
+ uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
+ uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
+ uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
+ uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
+ // The weight to PredCommonDest should be PredCommon * SuccTotal.
+ // The weight to PredOtherDest should be PredOther * SuccCommon.
+ uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
+ PredOther * SuccCommon};
+
+ FitWeights(NewWeights);
+
+ NV->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BI->getContext())
+ .createBranchWeights(NewWeights[0], NewWeights[1]));
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "INTO: " << *PBI->getParent());
+ DEBUG(dbgs() << *PBI->getParent()->getParent());
+
+ // This basic block is probably dead. We know it has at least
+ // one fewer predecessor.
+ return true;
+}
+
+// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
+// true or to FalseBB if Cond is false.
+// Takes care of updating the successors and removing the old terminator.
+// Also makes sure not to introduce new successors by assuming that edges to
+// non-successor TrueBBs and FalseBBs aren't reachable.
+static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
+ BasicBlock *TrueBB, BasicBlock *FalseBB,
+ uint32_t TrueWeight,
+ uint32_t FalseWeight) {
+ // Remove any superfluous successor edges from the CFG.
+ // First, figure out which successors to preserve.
+ // If TrueBB and FalseBB are equal, only try to preserve one copy of that
+ // successor.
+ BasicBlock *KeepEdge1 = TrueBB;
+ BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
+
+ // Then remove the rest.
+ for (BasicBlock *Succ : OldTerm->successors()) {
+ // Make sure only to keep exactly one copy of each edge.
+ if (Succ == KeepEdge1)
+ KeepEdge1 = nullptr;
+ else if (Succ == KeepEdge2)
+ KeepEdge2 = nullptr;
+ else
+ Succ->removePredecessor(OldTerm->getParent(),
+ /*DontDeleteUselessPHIs=*/true);
+ }
+
+ IRBuilder<> Builder(OldTerm);
+ Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
+
+ // Insert an appropriate new terminator.
+ if (!KeepEdge1 && !KeepEdge2) {
+ if (TrueBB == FalseBB)
+ // We were only looking for one successor, and it was present.
+ // Create an unconditional branch to it.
+ Builder.CreateBr(TrueBB);
+ else {
+ // We found both of the successors we were looking for.
+ // Create a conditional branch sharing the condition of the select.
+ BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
+ if (TrueWeight != FalseWeight)
+ NewBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(OldTerm->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
+ }
+ } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
+ // Neither of the selected blocks were successors, so this
+ // terminator must be unreachable.
+ new UnreachableInst(OldTerm->getContext(), OldTerm);
+ } else {
+ // One of the selected values was a successor, but the other wasn't.
+ // Insert an unconditional branch to the one that was found;
+ // the edge to the one that wasn't must be unreachable.
+ if (!KeepEdge1)
+ // Only TrueBB was found.
+ Builder.CreateBr(TrueBB);
+ else
+ // Only FalseBB was found.
+ Builder.CreateBr(FalseBB);
+ }
+
+ EraseTerminatorInstAndDCECond(OldTerm);
+ return true;
+}
+
+// Replaces
+// (switch (select cond, X, Y)) on constant X, Y
+// with a branch - conditional if X and Y lead to distinct BBs,
+// unconditional otherwise.
+static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
+ // Check for constant integer values in the select.
+ ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
+ ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
+ if (!TrueVal || !FalseVal)
+ return false;
+
+ // Find the relevant condition and destinations.
+ Value *Condition = Select->getCondition();
+ BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
+ BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
+
+ // Get weight for TrueBB and FalseBB.
+ uint32_t TrueWeight = 0, FalseWeight = 0;
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeights = HasBranchWeights(SI);
+ if (HasWeights) {
+ GetBranchWeights(SI, Weights);
+ if (Weights.size() == 1 + SI->getNumCases()) {
+ TrueWeight =
+ (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
+ FalseWeight =
+ (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
+ }
+ }
+
+ // Perform the actual simplification.
+ return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
+ FalseWeight);
+}
+
+// Replaces
+// (indirectbr (select cond, blockaddress(@fn, BlockA),
+// blockaddress(@fn, BlockB)))
+// with
+// (br cond, BlockA, BlockB).
+static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
+ // Check that both operands of the select are block addresses.
+ BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
+ BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
+ if (!TBA || !FBA)
+ return false;
+
+ // Extract the actual blocks.
+ BasicBlock *TrueBB = TBA->getBasicBlock();
+ BasicBlock *FalseBB = FBA->getBasicBlock();
+
+ // Perform the actual simplification.
+ return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
+ 0);
+}
+
+/// This is called when we find an icmp instruction
+/// (a seteq/setne with a constant) as the only instruction in a
+/// block that ends with an uncond branch. We are looking for a very specific
+/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
+/// this case, we merge the first two "or's of icmp" into a switch, but then the
+/// default value goes to an uncond block with a seteq in it, we get something
+/// like:
+///
+/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
+/// DEFAULT:
+/// %tmp = icmp eq i8 %A, 92
+/// br label %end
+/// end:
+/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
+///
+/// We prefer to split the edge to 'end' so that there is a true/false entry to
+/// the PHI, merging the third icmp into the switch.
+static bool TryToSimplifyUncondBranchWithICmpInIt(
+ ICmpInst *ICI, IRBuilder<> &Builder, const DataLayout &DL,
+ const TargetTransformInfo &TTI, unsigned BonusInstThreshold,
+ AssumptionCache *AC) {
+ BasicBlock *BB = ICI->getParent();
+
+ // If the block has any PHIs in it or the icmp has multiple uses, it is too
+ // complex.
+ if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
+ return false;
+
+ Value *V = ICI->getOperand(0);
+ ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
+
+ // The pattern we're looking for is where our only predecessor is a switch on
+ // 'V' and this block is the default case for the switch. In this case we can
+ // fold the compared value into the switch to simplify things.
+ BasicBlock *Pred = BB->getSinglePredecessor();
+ if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
+ return false;
+
+ SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
+ if (SI->getCondition() != V)
+ return false;
+
+ // If BB is reachable on a non-default case, then we simply know the value of
+ // V in this block. Substitute it and constant fold the icmp instruction
+ // away.
+ if (SI->getDefaultDest() != BB) {
+ ConstantInt *VVal = SI->findCaseDest(BB);
+ assert(VVal && "Should have a unique destination value");
+ ICI->setOperand(0, VVal);
+
+ if (Value *V = SimplifyInstruction(ICI, {DL, ICI})) {
+ ICI->replaceAllUsesWith(V);
+ ICI->eraseFromParent();
+ }
+ // BB is now empty, so it is likely to simplify away.
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ }
+
+ // Ok, the block is reachable from the default dest. If the constant we're
+ // comparing exists in one of the other edges, then we can constant fold ICI
+ // and zap it.
+ if (SI->findCaseValue(Cst) != SI->case_default()) {
+ Value *V;
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ V = ConstantInt::getFalse(BB->getContext());
+ else
+ V = ConstantInt::getTrue(BB->getContext());
+
+ ICI->replaceAllUsesWith(V);
+ ICI->eraseFromParent();
+ // BB is now empty, so it is likely to simplify away.
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ }
+
+ // The use of the icmp has to be in the 'end' block, by the only PHI node in
+ // the block.
+ BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
+ PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
+ if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
+ isa<PHINode>(++BasicBlock::iterator(PHIUse)))
+ return false;
+
+ // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
+ // true in the PHI.
+ Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
+ Constant *NewCst = ConstantInt::getFalse(BB->getContext());
+
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ std::swap(DefaultCst, NewCst);
+
+ // Replace ICI (which is used by the PHI for the default value) with true or
+ // false depending on if it is EQ or NE.
+ ICI->replaceAllUsesWith(DefaultCst);
+ ICI->eraseFromParent();
+
+ // Okay, the switch goes to this block on a default value. Add an edge from
+ // the switch to the merge point on the compared value.
+ BasicBlock *NewBB =
+ BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeights = HasBranchWeights(SI);
+ if (HasWeights) {
+ GetBranchWeights(SI, Weights);
+ if (Weights.size() == 1 + SI->getNumCases()) {
+ // Split weight for default case to case for "Cst".
+ Weights[0] = (Weights[0] + 1) >> 1;
+ Weights.push_back(Weights[0]);
+
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+ SI->setMetadata(
+ LLVMContext::MD_prof,
+ MDBuilder(SI->getContext()).createBranchWeights(MDWeights));
+ }
+ }
+ SI->addCase(Cst, NewBB);
+
+ // NewBB branches to the phi block, add the uncond branch and the phi entry.
+ Builder.SetInsertPoint(NewBB);
+ Builder.SetCurrentDebugLocation(SI->getDebugLoc());
+ Builder.CreateBr(SuccBlock);
+ PHIUse->addIncoming(NewCst, NewBB);
+ return true;
+}
+
+/// The specified branch is a conditional branch.
+/// Check to see if it is branching on an or/and chain of icmp instructions, and
+/// fold it into a switch instruction if so.
+static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
+ const DataLayout &DL) {
+ Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
+ if (!Cond)
+ return false;
+
+ // Change br (X == 0 | X == 1), T, F into a switch instruction.
+ // If this is a bunch of seteq's or'd together, or if it's a bunch of
+ // 'setne's and'ed together, collect them.
+
+ // Try to gather values from a chain of and/or to be turned into a switch
+ ConstantComparesGatherer ConstantCompare(Cond, DL);
+ // Unpack the result
+ SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
+ Value *CompVal = ConstantCompare.CompValue;
+ unsigned UsedICmps = ConstantCompare.UsedICmps;
+ Value *ExtraCase = ConstantCompare.Extra;
+
+ // If we didn't have a multiply compared value, fail.
+ if (!CompVal)
+ return false;
+
+ // Avoid turning single icmps into a switch.
+ if (UsedICmps <= 1)
+ return false;
+
+ bool TrueWhenEqual = (Cond->getOpcode() == Instruction::Or);
+
+ // There might be duplicate constants in the list, which the switch
+ // instruction can't handle, remove them now.
+ array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
+ Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
+
+ // If Extra was used, we require at least two switch values to do the
+ // transformation. A switch with one value is just a conditional branch.
+ if (ExtraCase && Values.size() < 2)
+ return false;
+
+ // TODO: Preserve branch weight metadata, similarly to how
+ // FoldValueComparisonIntoPredecessors preserves it.
+
+ // Figure out which block is which destination.
+ BasicBlock *DefaultBB = BI->getSuccessor(1);
+ BasicBlock *EdgeBB = BI->getSuccessor(0);
+ if (!TrueWhenEqual)
+ std::swap(DefaultBB, EdgeBB);
+
+ BasicBlock *BB = BI->getParent();
+
+ DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
+ << " cases into SWITCH. BB is:\n"
+ << *BB);
+
+ // If there are any extra values that couldn't be folded into the switch
+ // then we evaluate them with an explicit branch first. Split the block
+ // right before the condbr to handle it.
+ if (ExtraCase) {
+ BasicBlock *NewBB =
+ BB->splitBasicBlock(BI->getIterator(), "switch.early.test");
+ // Remove the uncond branch added to the old block.
+ TerminatorInst *OldTI = BB->getTerminator();
+ Builder.SetInsertPoint(OldTI);
+
+ if (TrueWhenEqual)
+ Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
+ else
+ Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
+
+ OldTI->eraseFromParent();
+
+ // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
+ // for the edge we just added.
+ AddPredecessorToBlock(EdgeBB, BB, NewBB);
+
+ DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
+ << "\nEXTRABB = " << *BB);
+ BB = NewBB;
+ }
+
+ Builder.SetInsertPoint(BI);
+ // Convert pointer to int before we switch.
+ if (CompVal->getType()->isPointerTy()) {
+ CompVal = Builder.CreatePtrToInt(
+ CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
+ }
+
+ // Create the new switch instruction now.
+ SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
+
+ // Add all of the 'cases' to the switch instruction.
+ for (unsigned i = 0, e = Values.size(); i != e; ++i)
+ New->addCase(Values[i], EdgeBB);
+
+ // We added edges from PI to the EdgeBB. As such, if there were any
+ // PHI nodes in EdgeBB, they need entries to be added corresponding to
+ // the number of edges added.
+ for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
+ PHINode *PN = cast<PHINode>(BBI);
+ Value *InVal = PN->getIncomingValueForBlock(BB);
+ for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
+ PN->addIncoming(InVal, BB);
+ }
+
+ // Erase the old branch instruction.
+ EraseTerminatorInstAndDCECond(BI);
+
+ DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
+ return true;
+}
+
+bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
+ if (isa<PHINode>(RI->getValue()))
+ return SimplifyCommonResume(RI);
+ else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
+ RI->getValue() == RI->getParent()->getFirstNonPHI())
+ // The resume must unwind the exception that caused control to branch here.
+ return SimplifySingleResume(RI);
+
+ return false;
+}
+
+// Simplify resume that is shared by several landing pads (phi of landing pad).
+bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) {
+ BasicBlock *BB = RI->getParent();
+
+ // Check that there are no other instructions except for debug intrinsics
+ // between the phi of landing pads (RI->getValue()) and resume instruction.
+ BasicBlock::iterator I = cast<Instruction>(RI->getValue())->getIterator(),
+ E = RI->getIterator();
+ while (++I != E)
+ if (!isa<DbgInfoIntrinsic>(I))
+ return false;
+
+ SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
+ auto *PhiLPInst = cast<PHINode>(RI->getValue());
+
+ // Check incoming blocks to see if any of them are trivial.
+ for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
+ Idx++) {
+ auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
+ auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
+
+ // If the block has other successors, we can not delete it because
+ // it has other dependents.
+ if (IncomingBB->getUniqueSuccessor() != BB)
+ continue;
+
+ auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
+ // Not the landing pad that caused the control to branch here.
+ if (IncomingValue != LandingPad)
+ continue;
+
+ bool isTrivial = true;
+
+ I = IncomingBB->getFirstNonPHI()->getIterator();
+ E = IncomingBB->getTerminator()->getIterator();
+ while (++I != E)
+ if (!isa<DbgInfoIntrinsic>(I)) {
+ isTrivial = false;
+ break;
+ }
+
+ if (isTrivial)
+ TrivialUnwindBlocks.insert(IncomingBB);
+ }
+
+ // If no trivial unwind blocks, don't do any simplifications.
+ if (TrivialUnwindBlocks.empty())
+ return false;
+
+ // Turn all invokes that unwind here into calls.
+ for (auto *TrivialBB : TrivialUnwindBlocks) {
+ // Blocks that will be simplified should be removed from the phi node.
+ // Note there could be multiple edges to the resume block, and we need
+ // to remove them all.
+ while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
+ BB->removePredecessor(TrivialBB, true);
+
+ for (pred_iterator PI = pred_begin(TrivialBB), PE = pred_end(TrivialBB);
+ PI != PE;) {
+ BasicBlock *Pred = *PI++;
+ removeUnwindEdge(Pred);
+ }
+
+ // In each SimplifyCFG run, only the current processed block can be erased.
+ // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
+ // of erasing TrivialBB, we only remove the branch to the common resume
+ // block so that we can later erase the resume block since it has no
+ // predecessors.
+ TrivialBB->getTerminator()->eraseFromParent();
+ new UnreachableInst(RI->getContext(), TrivialBB);
+ }
+
+ // Delete the resume block if all its predecessors have been removed.
+ if (pred_empty(BB))
+ BB->eraseFromParent();
+
+ return !TrivialUnwindBlocks.empty();
+}
+
+// Simplify resume that is only used by a single (non-phi) landing pad.
+bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) {
+ BasicBlock *BB = RI->getParent();
+ LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI());
+ assert(RI->getValue() == LPInst &&
+ "Resume must unwind the exception that caused control to here");
+
+ // Check that there are no other instructions except for debug intrinsics.
+ BasicBlock::iterator I = LPInst->getIterator(), E = RI->getIterator();
+ while (++I != E)
+ if (!isa<DbgInfoIntrinsic>(I))
+ return false;
+
+ // Turn all invokes that unwind here into calls and delete the basic block.
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
+ BasicBlock *Pred = *PI++;
+ removeUnwindEdge(Pred);
+ }
+
+ // The landingpad is now unreachable. Zap it.
+ BB->eraseFromParent();
+ if (LoopHeaders)
+ LoopHeaders->erase(BB);
+ return true;
+}
+
+static bool removeEmptyCleanup(CleanupReturnInst *RI) {
+ // If this is a trivial cleanup pad that executes no instructions, it can be
+ // eliminated. If the cleanup pad continues to the caller, any predecessor
+ // that is an EH pad will be updated to continue to the caller and any
+ // predecessor that terminates with an invoke instruction will have its invoke
+ // instruction converted to a call instruction. If the cleanup pad being
+ // simplified does not continue to the caller, each predecessor will be
+ // updated to continue to the unwind destination of the cleanup pad being
+ // simplified.
+ BasicBlock *BB = RI->getParent();
+ CleanupPadInst *CPInst = RI->getCleanupPad();
+ if (CPInst->getParent() != BB)
+ // This isn't an empty cleanup.
+ return false;
+
+ // We cannot kill the pad if it has multiple uses. This typically arises
+ // from unreachable basic blocks.
+ if (!CPInst->hasOneUse())
+ return false;
+
+ // Check that there are no other instructions except for benign intrinsics.
+ BasicBlock::iterator I = CPInst->getIterator(), E = RI->getIterator();
+ while (++I != E) {
+ auto *II = dyn_cast<IntrinsicInst>(I);
+ if (!II)
+ return false;
+
+ Intrinsic::ID IntrinsicID = II->getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::lifetime_end:
+ break;
+ default:
+ return false;
+ }
+ }
+
+ // If the cleanup return we are simplifying unwinds to the caller, this will
+ // set UnwindDest to nullptr.
+ BasicBlock *UnwindDest = RI->getUnwindDest();
+ Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
+
+ // We're about to remove BB from the control flow. Before we do, sink any
+ // PHINodes into the unwind destination. Doing this before changing the
+ // control flow avoids some potentially slow checks, since we can currently
+ // be certain that UnwindDest and BB have no common predecessors (since they
+ // are both EH pads).
+ if (UnwindDest) {
+ // First, go through the PHI nodes in UnwindDest and update any nodes that
+ // reference the block we are removing
+ for (BasicBlock::iterator I = UnwindDest->begin(),
+ IE = DestEHPad->getIterator();
+ I != IE; ++I) {
+ PHINode *DestPN = cast<PHINode>(I);
+
+ int Idx = DestPN->getBasicBlockIndex(BB);
+ // Since BB unwinds to UnwindDest, it has to be in the PHI node.
+ assert(Idx != -1);
+ // This PHI node has an incoming value that corresponds to a control
+ // path through the cleanup pad we are removing. If the incoming
+ // value is in the cleanup pad, it must be a PHINode (because we
+ // verified above that the block is otherwise empty). Otherwise, the
+ // value is either a constant or a value that dominates the cleanup
+ // pad being removed.
+ //
+ // Because BB and UnwindDest are both EH pads, all of their
+ // predecessors must unwind to these blocks, and since no instruction
+ // can have multiple unwind destinations, there will be no overlap in
+ // incoming blocks between SrcPN and DestPN.
+ Value *SrcVal = DestPN->getIncomingValue(Idx);
+ PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
+
+ // Remove the entry for the block we are deleting.
+ DestPN->removeIncomingValue(Idx, false);
+
+ if (SrcPN && SrcPN->getParent() == BB) {
+ // If the incoming value was a PHI node in the cleanup pad we are
+ // removing, we need to merge that PHI node's incoming values into
+ // DestPN.
+ for (unsigned SrcIdx = 0, SrcE = SrcPN->getNumIncomingValues();
+ SrcIdx != SrcE; ++SrcIdx) {
+ DestPN->addIncoming(SrcPN->getIncomingValue(SrcIdx),
+ SrcPN->getIncomingBlock(SrcIdx));
+ }
+ } else {
+ // Otherwise, the incoming value came from above BB and
+ // so we can just reuse it. We must associate all of BB's
+ // predecessors with this value.
+ for (auto *pred : predecessors(BB)) {
+ DestPN->addIncoming(SrcVal, pred);
+ }
+ }
+ }
+
+ // Sink any remaining PHI nodes directly into UnwindDest.
+ Instruction *InsertPt = DestEHPad;
+ for (BasicBlock::iterator I = BB->begin(),
+ IE = BB->getFirstNonPHI()->getIterator();
+ I != IE;) {
+ // The iterator must be incremented here because the instructions are
+ // being moved to another block.
+ PHINode *PN = cast<PHINode>(I++);
+ if (PN->use_empty())
+ // If the PHI node has no uses, just leave it. It will be erased
+ // when we erase BB below.
+ continue;
+
+ // Otherwise, sink this PHI node into UnwindDest.
+ // Any predecessors to UnwindDest which are not already represented
+ // must be back edges which inherit the value from the path through
+ // BB. In this case, the PHI value must reference itself.
+ for (auto *pred : predecessors(UnwindDest))
+ if (pred != BB)
+ PN->addIncoming(PN, pred);
+ PN->moveBefore(InsertPt);
+ }
+ }
+
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
+ // The iterator must be updated here because we are removing this pred.
+ BasicBlock *PredBB = *PI++;
+ if (UnwindDest == nullptr) {
+ removeUnwindEdge(PredBB);
+ } else {
+ TerminatorInst *TI = PredBB->getTerminator();
+ TI->replaceUsesOfWith(BB, UnwindDest);
+ }
+ }
+
+ // The cleanup pad is now unreachable. Zap it.
+ BB->eraseFromParent();
+ return true;
+}
+
+// Try to merge two cleanuppads together.
+static bool mergeCleanupPad(CleanupReturnInst *RI) {
+ // Skip any cleanuprets which unwind to caller, there is nothing to merge
+ // with.
+ BasicBlock *UnwindDest = RI->getUnwindDest();
+ if (!UnwindDest)
+ return false;
+
+ // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
+ // be safe to merge without code duplication.
+ if (UnwindDest->getSinglePredecessor() != RI->getParent())
+ return false;
+
+ // Verify that our cleanuppad's unwind destination is another cleanuppad.
+ auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
+ if (!SuccessorCleanupPad)
+ return false;
+
+ CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
+ // Replace any uses of the successor cleanupad with the predecessor pad
+ // The only cleanuppad uses should be this cleanupret, it's cleanupret and
+ // funclet bundle operands.
+ SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
+ // Remove the old cleanuppad.
+ SuccessorCleanupPad->eraseFromParent();
+ // Now, we simply replace the cleanupret with a branch to the unwind
+ // destination.
+ BranchInst::Create(UnwindDest, RI->getParent());
+ RI->eraseFromParent();
+
+ return true;
+}
+
+bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) {
+ // It is possible to transiantly have an undef cleanuppad operand because we
+ // have deleted some, but not all, dead blocks.
+ // Eventually, this block will be deleted.
+ if (isa<UndefValue>(RI->getOperand(0)))
+ return false;
+
+ if (mergeCleanupPad(RI))
+ return true;
+
+ if (removeEmptyCleanup(RI))
+ return true;
+
+ return false;
+}
+
+bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
+ BasicBlock *BB = RI->getParent();
+ if (!BB->getFirstNonPHIOrDbg()->isTerminator())
+ return false;
+
+ // Find predecessors that end with branches.
+ SmallVector<BasicBlock *, 8> UncondBranchPreds;
+ SmallVector<BranchInst *, 8> CondBranchPreds;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ TerminatorInst *PTI = P->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
+ if (BI->isUnconditional())
+ UncondBranchPreds.push_back(P);
+ else
+ CondBranchPreds.push_back(BI);
+ }
+ }
+
+ // If we found some, do the transformation!
+ if (!UncondBranchPreds.empty() && DupRet) {
+ while (!UncondBranchPreds.empty()) {
+ BasicBlock *Pred = UncondBranchPreds.pop_back_val();
+ DEBUG(dbgs() << "FOLDING: " << *BB
+ << "INTO UNCOND BRANCH PRED: " << *Pred);
+ (void)FoldReturnIntoUncondBranch(RI, BB, Pred);
+ }
+
+ // If we eliminated all predecessors of the block, delete the block now.
+ if (pred_empty(BB)) {
+ // We know there are no successors, so just nuke the block.
+ BB->eraseFromParent();
+ if (LoopHeaders)
+ LoopHeaders->erase(BB);
+ }
+
+ return true;
+ }
+
+ // Check out all of the conditional branches going to this return
+ // instruction. If any of them just select between returns, change the
+ // branch itself into a select/return pair.
+ while (!CondBranchPreds.empty()) {
+ BranchInst *BI = CondBranchPreds.pop_back_val();
+
+ // Check to see if the non-BB successor is also a return block.
+ if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
+ isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
+ SimplifyCondBranchToTwoReturns(BI, Builder))
+ return true;
+ }
+ return false;
+}
+
+bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
+ BasicBlock *BB = UI->getParent();
+
+ bool Changed = false;
+
+ // If there are any instructions immediately before the unreachable that can
+ // be removed, do so.
+ while (UI->getIterator() != BB->begin()) {
+ BasicBlock::iterator BBI = UI->getIterator();
+ --BBI;
+ // Do not delete instructions that can have side effects which might cause
+ // the unreachable to not be reachable; specifically, calls and volatile
+ // operations may have this effect.
+ if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI))
+ break;
+
+ if (BBI->mayHaveSideEffects()) {
+ if (auto *SI = dyn_cast<StoreInst>(BBI)) {
+ if (SI->isVolatile())
+ break;
+ } else if (auto *LI = dyn_cast<LoadInst>(BBI)) {
+ if (LI->isVolatile())
+ break;
+ } else if (auto *RMWI = dyn_cast<AtomicRMWInst>(BBI)) {
+ if (RMWI->isVolatile())
+ break;
+ } else if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) {
+ if (CXI->isVolatile())
+ break;
+ } else if (isa<CatchPadInst>(BBI)) {
+ // A catchpad may invoke exception object constructors and such, which
+ // in some languages can be arbitrary code, so be conservative by
+ // default.
+ // For CoreCLR, it just involves a type test, so can be removed.
+ if (classifyEHPersonality(BB->getParent()->getPersonalityFn()) !=
+ EHPersonality::CoreCLR)
+ break;
+ } else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) &&
+ !isa<LandingPadInst>(BBI)) {
+ break;
+ }
+ // Note that deleting LandingPad's here is in fact okay, although it
+ // involves a bit of subtle reasoning. If this inst is a LandingPad,
+ // all the predecessors of this block will be the unwind edges of Invokes,
+ // and we can therefore guarantee this block will be erased.
+ }
+
+ // Delete this instruction (any uses are guaranteed to be dead)
+ if (!BBI->use_empty())
+ BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+ BBI->eraseFromParent();
+ Changed = true;
+ }
+
+ // If the unreachable instruction is the first in the block, take a gander
+ // at all of the predecessors of this instruction, and simplify them.
+ if (&BB->front() != UI)
+ return Changed;
+
+ SmallVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ TerminatorInst *TI = Preds[i]->getTerminator();
+ IRBuilder<> Builder(TI);
+ if (auto *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isUnconditional()) {
+ if (BI->getSuccessor(0) == BB) {
+ new UnreachableInst(TI->getContext(), TI);
+ TI->eraseFromParent();
+ Changed = true;
+ }
+ } else {
+ if (BI->getSuccessor(0) == BB) {
+ Builder.CreateBr(BI->getSuccessor(1));
+ EraseTerminatorInstAndDCECond(BI);
+ } else if (BI->getSuccessor(1) == BB) {
+ Builder.CreateBr(BI->getSuccessor(0));
+ EraseTerminatorInstAndDCECond(BI);
+ Changed = true;
+ }
+ }
+ } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
+ for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) {
+ if (i->getCaseSuccessor() != BB) {
+ ++i;
+ continue;
+ }
+ BB->removePredecessor(SI->getParent());
+ i = SI->removeCase(i);
+ e = SI->case_end();
+ Changed = true;
+ }
+ } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
+ if (II->getUnwindDest() == BB) {
+ removeUnwindEdge(TI->getParent());
+ Changed = true;
+ }
+ } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
+ if (CSI->getUnwindDest() == BB) {
+ removeUnwindEdge(TI->getParent());
+ Changed = true;
+ continue;
+ }
+
+ for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
+ E = CSI->handler_end();
+ I != E; ++I) {
+ if (*I == BB) {
+ CSI->removeHandler(I);
+ --I;
+ --E;
+ Changed = true;
+ }
+ }
+ if (CSI->getNumHandlers() == 0) {
+ BasicBlock *CatchSwitchBB = CSI->getParent();
+ if (CSI->hasUnwindDest()) {
+ // Redirect preds to the unwind dest
+ CatchSwitchBB->replaceAllUsesWith(CSI->getUnwindDest());
+ } else {
+ // Rewrite all preds to unwind to caller (or from invoke to call).
+ SmallVector<BasicBlock *, 8> EHPreds(predecessors(CatchSwitchBB));
+ for (BasicBlock *EHPred : EHPreds)
+ removeUnwindEdge(EHPred);
+ }
+ // The catchswitch is no longer reachable.
+ new UnreachableInst(CSI->getContext(), CSI);
+ CSI->eraseFromParent();
+ Changed = true;
+ }
+ } else if (isa<CleanupReturnInst>(TI)) {
+ new UnreachableInst(TI->getContext(), TI);
+ TI->eraseFromParent();
+ Changed = true;
+ }
+ }
+
+ // If this block is now dead, remove it.
+ if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
+ // We know there are no successors, so just nuke the block.
+ BB->eraseFromParent();
+ if (LoopHeaders)
+ LoopHeaders->erase(BB);
+ return true;
+ }
+
+ return Changed;
+}
+
+static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
+ assert(Cases.size() >= 1);
+
+ array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
+ for (size_t I = 1, E = Cases.size(); I != E; ++I) {
+ if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
+ return false;
+ }
+ return true;
+}
+
+/// Turn a switch with two reachable destinations into an integer range
+/// comparison and branch.
+static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
+ assert(SI->getNumCases() > 1 && "Degenerate switch?");
+
+ bool HasDefault =
+ !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+
+ // Partition the cases into two sets with different destinations.
+ BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
+ BasicBlock *DestB = nullptr;
+ SmallVector<ConstantInt *, 16> CasesA;
+ SmallVector<ConstantInt *, 16> CasesB;
+
+ for (auto Case : SI->cases()) {
+ BasicBlock *Dest = Case.getCaseSuccessor();
+ if (!DestA)
+ DestA = Dest;
+ if (Dest == DestA) {
+ CasesA.push_back(Case.getCaseValue());
+ continue;
+ }
+ if (!DestB)
+ DestB = Dest;
+ if (Dest == DestB) {
+ CasesB.push_back(Case.getCaseValue());
+ continue;
+ }
+ return false; // More than two destinations.
+ }
+
+ assert(DestA && DestB &&
+ "Single-destination switch should have been folded.");
+ assert(DestA != DestB);
+ assert(DestB != SI->getDefaultDest());
+ assert(!CasesB.empty() && "There must be non-default cases.");
+ assert(!CasesA.empty() || HasDefault);
+
+ // Figure out if one of the sets of cases form a contiguous range.
+ SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
+ BasicBlock *ContiguousDest = nullptr;
+ BasicBlock *OtherDest = nullptr;
+ if (!CasesA.empty() && CasesAreContiguous(CasesA)) {
+ ContiguousCases = &CasesA;
+ ContiguousDest = DestA;
+ OtherDest = DestB;
+ } else if (CasesAreContiguous(CasesB)) {
+ ContiguousCases = &CasesB;
+ ContiguousDest = DestB;
+ OtherDest = DestA;
+ } else
+ return false;
+
+ // Start building the compare and branch.
+
+ Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
+ Constant *NumCases =
+ ConstantInt::get(Offset->getType(), ContiguousCases->size());
+
+ Value *Sub = SI->getCondition();
+ if (!Offset->isNullValue())
+ Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
+
+ Value *Cmp;
+ // If NumCases overflowed, then all possible values jump to the successor.
+ if (NumCases->isNullValue() && !ContiguousCases->empty())
+ Cmp = ConstantInt::getTrue(SI->getContext());
+ else
+ Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
+ BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
+
+ // Update weight for the newly-created conditional branch.
+ if (HasBranchWeights(SI)) {
+ SmallVector<uint64_t, 8> Weights;
+ GetBranchWeights(SI, Weights);
+ if (Weights.size() == 1 + SI->getNumCases()) {
+ uint64_t TrueWeight = 0;
+ uint64_t FalseWeight = 0;
+ for (size_t I = 0, E = Weights.size(); I != E; ++I) {
+ if (SI->getSuccessor(I) == ContiguousDest)
+ TrueWeight += Weights[I];
+ else
+ FalseWeight += Weights[I];
+ }
+ while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
+ TrueWeight /= 2;
+ FalseWeight /= 2;
+ }
+ NewBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getContext())
+ .createBranchWeights((uint32_t)TrueWeight,
+ (uint32_t)FalseWeight));
+ }
+ }
+
+ // Prune obsolete incoming values off the successors' PHI nodes.
+ for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
+ unsigned PreviousEdges = ContiguousCases->size();
+ if (ContiguousDest == SI->getDefaultDest())
+ ++PreviousEdges;
+ for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
+ cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+ }
+ for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
+ unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
+ if (OtherDest == SI->getDefaultDest())
+ ++PreviousEdges;
+ for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
+ cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+ }
+
+ // Drop the switch.
+ SI->eraseFromParent();
+
+ return true;
+}
+
+/// Compute masked bits for the condition of a switch
+/// and use it to remove dead cases.
+static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
+ const DataLayout &DL) {
+ Value *Cond = SI->getCondition();
+ unsigned Bits = Cond->getType()->getIntegerBitWidth();
+ KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
+
+ // We can also eliminate cases by determining that their values are outside of
+ // the limited range of the condition based on how many significant (non-sign)
+ // bits are in the condition value.
+ unsigned ExtraSignBits = ComputeNumSignBits(Cond, DL, 0, AC, SI) - 1;
+ unsigned MaxSignificantBitsInCond = Bits - ExtraSignBits;
+
+ // Gather dead cases.
+ SmallVector<ConstantInt *, 8> DeadCases;
+ for (auto &Case : SI->cases()) {
+ const APInt &CaseVal = Case.getCaseValue()->getValue();
+ if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
+ (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {
+ DeadCases.push_back(Case.getCaseValue());
+ DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal << " is dead.\n");
+ }
+ }
+
+ // If we can prove that the cases must cover all possible values, the
+ // default destination becomes dead and we can remove it. If we know some
+ // of the bits in the value, we can use that to more precisely compute the
+ // number of possible unique case values.
+ bool HasDefault =
+ !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+ const unsigned NumUnknownBits =
+ Bits - (Known.Zero | Known.One).countPopulation();
+ assert(NumUnknownBits <= Bits);
+ if (HasDefault && DeadCases.empty() &&
+ NumUnknownBits < 64 /* avoid overflow */ &&
+ SI->getNumCases() == (1ULL << NumUnknownBits)) {
+ DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
+ BasicBlock *NewDefault =
+ SplitBlockPredecessors(SI->getDefaultDest(), SI->getParent(), "");
+ SI->setDefaultDest(&*NewDefault);
+ SplitBlock(&*NewDefault, &NewDefault->front());
+ auto *OldTI = NewDefault->getTerminator();
+ new UnreachableInst(SI->getContext(), OldTI);
+ EraseTerminatorInstAndDCECond(OldTI);
+ return true;
+ }
+
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeight = HasBranchWeights(SI);
+ if (HasWeight) {
+ GetBranchWeights(SI, Weights);
+ HasWeight = (Weights.size() == 1 + SI->getNumCases());
+ }
+
+ // Remove dead cases from the switch.
+ for (ConstantInt *DeadCase : DeadCases) {
+ SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
+ assert(CaseI != SI->case_default() &&
+ "Case was not found. Probably mistake in DeadCases forming.");
+ if (HasWeight) {
+ std::swap(Weights[CaseI->getCaseIndex() + 1], Weights.back());
+ Weights.pop_back();
+ }
+
+ // Prune unused values from PHI nodes.
+ CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
+ SI->removeCase(CaseI);
+ }
+ if (HasWeight && Weights.size() >= 2) {
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getParent()->getContext())
+ .createBranchWeights(MDWeights));
+ }
+
+ return !DeadCases.empty();
+}
+
+/// If BB would be eligible for simplification by
+/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
+/// by an unconditional branch), look at the phi node for BB in the successor
+/// block and see if the incoming value is equal to CaseValue. If so, return
+/// the phi node, and set PhiIndex to BB's index in the phi node.
+static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
+ BasicBlock *BB, int *PhiIndex) {
+ if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
+ return nullptr; // BB must be empty to be a candidate for simplification.
+ if (!BB->getSinglePredecessor())
+ return nullptr; // BB must be dominated by the switch.
+
+ BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!Branch || !Branch->isUnconditional())
+ return nullptr; // Terminator must be unconditional branch.
+
+ BasicBlock *Succ = Branch->getSuccessor(0);
+
+ BasicBlock::iterator I = Succ->begin();
+ while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
+ int Idx = PHI->getBasicBlockIndex(BB);
+ assert(Idx >= 0 && "PHI has no entry for predecessor?");
+
+ Value *InValue = PHI->getIncomingValue(Idx);
+ if (InValue != CaseValue)
+ continue;
+
+ *PhiIndex = Idx;
+ return PHI;
+ }
+
+ return nullptr;
+}
+
+/// Try to forward the condition of a switch instruction to a phi node
+/// dominated by the switch, if that would mean that some of the destination
+/// blocks of the switch can be folded away.
+/// Returns true if a change is made.
+static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
+ typedef DenseMap<PHINode *, SmallVector<int, 4>> ForwardingNodesMap;
+ ForwardingNodesMap ForwardingNodes;
+
+ for (auto Case : SI->cases()) {
+ ConstantInt *CaseValue = Case.getCaseValue();
+ BasicBlock *CaseDest = Case.getCaseSuccessor();
+
+ int PhiIndex;
+ PHINode *PHI =
+ FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIndex);
+ if (!PHI)
+ continue;
+
+ ForwardingNodes[PHI].push_back(PhiIndex);
+ }
+
+ bool Changed = false;
+
+ for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(),
+ E = ForwardingNodes.end();
+ I != E; ++I) {
+ PHINode *Phi = I->first;
+ SmallVectorImpl<int> &Indexes = I->second;
+
+ if (Indexes.size() < 2)
+ continue;
+
+ for (size_t I = 0, E = Indexes.size(); I != E; ++I)
+ Phi->setIncomingValue(Indexes[I], SI->getCondition());
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// Return true if the backend will be able to handle
+/// initializing an array of constants like C.
+static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) {
+ if (C->isThreadDependent())
+ return false;
+ if (C->isDLLImportDependent())
+ return false;
+
+ if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
+ !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
+ !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
+ return false;
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ if (!CE->isGEPWithNoNotionalOverIndexing())
+ return false;
+ if (!ValidLookupTableConstant(CE->getOperand(0), TTI))
+ return false;
+ }
+
+ if (!TTI.shouldBuildLookupTablesForConstant(C))
+ return false;
+
+ return true;
+}
+
+/// If V is a Constant, return it. Otherwise, try to look up
+/// its constant value in ConstantPool, returning 0 if it's not there.
+static Constant *
+LookupConstant(Value *V,
+ const SmallDenseMap<Value *, Constant *> &ConstantPool) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return C;
+ return ConstantPool.lookup(V);
+}
+
+/// Try to fold instruction I into a constant. This works for
+/// simple instructions such as binary operations where both operands are
+/// constant or can be replaced by constants from the ConstantPool. Returns the
+/// resulting constant on success, 0 otherwise.
+static Constant *
+ConstantFold(Instruction *I, const DataLayout &DL,
+ const SmallDenseMap<Value *, Constant *> &ConstantPool) {
+ if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
+ Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
+ if (!A)
+ return nullptr;
+ if (A->isAllOnesValue())
+ return LookupConstant(Select->getTrueValue(), ConstantPool);
+ if (A->isNullValue())
+ return LookupConstant(Select->getFalseValue(), ConstantPool);
+ return nullptr;
+ }
+
+ SmallVector<Constant *, 4> COps;
+ for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
+ if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
+ COps.push_back(A);
+ else
+ return nullptr;
+ }
+
+ if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+ return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0],
+ COps[1], DL);
+ }
+
+ return ConstantFoldInstOperands(I, COps, DL);
+}
+
+/// Try to determine the resulting constant values in phi nodes
+/// at the common destination basic block, *CommonDest, for one of the case
+/// destionations CaseDest corresponding to value CaseVal (0 for the default
+/// case), of a switch instruction SI.
+static bool
+GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
+ BasicBlock **CommonDest,
+ SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
+ const DataLayout &DL, const TargetTransformInfo &TTI) {
+ // The block from which we enter the common destination.
+ BasicBlock *Pred = SI->getParent();
+
+ // If CaseDest is empty except for some side-effect free instructions through
+ // which we can constant-propagate the CaseVal, continue to its successor.
+ SmallDenseMap<Value *, Constant *> ConstantPool;
+ ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
+ for (BasicBlock::iterator I = CaseDest->begin(), E = CaseDest->end(); I != E;
+ ++I) {
+ if (TerminatorInst *T = dyn_cast<TerminatorInst>(I)) {
+ // If the terminator is a simple branch, continue to the next block.
+ if (T->getNumSuccessors() != 1 || T->isExceptional())
+ return false;
+ Pred = CaseDest;
+ CaseDest = T->getSuccessor(0);
+ } else if (isa<DbgInfoIntrinsic>(I)) {
+ // Skip debug intrinsic.
+ continue;
+ } else if (Constant *C = ConstantFold(&*I, DL, ConstantPool)) {
+ // Instruction is side-effect free and constant.
+
+ // If the instruction has uses outside this block or a phi node slot for
+ // the block, it is not safe to bypass the instruction since it would then
+ // no longer dominate all its uses.
+ for (auto &Use : I->uses()) {
+ User *User = Use.getUser();
+ if (Instruction *I = dyn_cast<Instruction>(User))
+ if (I->getParent() == CaseDest)
+ continue;
+ if (PHINode *Phi = dyn_cast<PHINode>(User))
+ if (Phi->getIncomingBlock(Use) == CaseDest)
+ continue;
+ return false;
+ }
+
+ ConstantPool.insert(std::make_pair(&*I, C));
+ } else {
+ break;
+ }
+ }
+
+ // If we did not have a CommonDest before, use the current one.
+ if (!*CommonDest)
+ *CommonDest = CaseDest;
+ // If the destination isn't the common one, abort.
+ if (CaseDest != *CommonDest)
+ return false;
+
+ // Get the values for this case from phi nodes in the destination block.
+ BasicBlock::iterator I = (*CommonDest)->begin();
+ while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
+ int Idx = PHI->getBasicBlockIndex(Pred);
+ if (Idx == -1)
+ continue;
+
+ Constant *ConstVal =
+ LookupConstant(PHI->getIncomingValue(Idx), ConstantPool);
+ if (!ConstVal)
+ return false;
+
+ // Be conservative about which kinds of constants we support.
+ if (!ValidLookupTableConstant(ConstVal, TTI))
+ return false;
+
+ Res.push_back(std::make_pair(PHI, ConstVal));
+ }
+
+ return Res.size() > 0;
+}
+
+// Helper function used to add CaseVal to the list of cases that generate
+// Result.
+static void MapCaseToResult(ConstantInt *CaseVal,
+ SwitchCaseResultVectorTy &UniqueResults,
+ Constant *Result) {
+ for (auto &I : UniqueResults) {
+ if (I.first == Result) {
+ I.second.push_back(CaseVal);
+ return;
+ }
+ }
+ UniqueResults.push_back(
+ std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
+}
+
+// Helper function that initializes a map containing
+// results for the PHI node of the common destination block for a switch
+// instruction. Returns false if multiple PHI nodes have been found or if
+// there is not a common destination block for the switch.
+static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
+ BasicBlock *&CommonDest,
+ SwitchCaseResultVectorTy &UniqueResults,
+ Constant *&DefaultResult,
+ const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
+ for (auto &I : SI->cases()) {
+ ConstantInt *CaseVal = I.getCaseValue();
+
+ // Resulting value at phi nodes for this case value.
+ SwitchCaseResultsTy Results;
+ if (!GetCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
+ DL, TTI))
+ return false;
+
+ // Only one value per case is permitted
+ if (Results.size() > 1)
+ return false;
+ MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
+
+ // Check the PHI consistency.
+ if (!PHI)
+ PHI = Results[0].first;
+ else if (PHI != Results[0].first)
+ return false;
+ }
+ // Find the default result value.
+ SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults;
+ BasicBlock *DefaultDest = SI->getDefaultDest();
+ GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
+ DL, TTI);
+ // If the default value is not found abort unless the default destination
+ // is unreachable.
+ DefaultResult =
+ DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
+ if ((!DefaultResult &&
+ !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
+ return false;
+
+ return true;
+}
+
+// Helper function that checks if it is possible to transform a switch with only
+// two cases (or two cases + default) that produces a result into a select.
+// Example:
+// switch (a) {
+// case 10: %0 = icmp eq i32 %a, 10
+// return 10; %1 = select i1 %0, i32 10, i32 4
+// case 20: ----> %2 = icmp eq i32 %a, 20
+// return 2; %3 = select i1 %2, i32 2, i32 %1
+// default:
+// return 4;
+// }
+static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector,
+ Constant *DefaultResult, Value *Condition,
+ IRBuilder<> &Builder) {
+ assert(ResultVector.size() == 2 &&
+ "We should have exactly two unique results at this point");
+ // If we are selecting between only two cases transform into a simple
+ // select or a two-way select if default is possible.
+ if (ResultVector[0].second.size() == 1 &&
+ ResultVector[1].second.size() == 1) {
+ ConstantInt *const FirstCase = ResultVector[0].second[0];
+ ConstantInt *const SecondCase = ResultVector[1].second[0];
+
+ bool DefaultCanTrigger = DefaultResult;
+ Value *SelectValue = ResultVector[1].first;
+ if (DefaultCanTrigger) {
+ Value *const ValueCompare =
+ Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
+ SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
+ DefaultResult, "switch.select");
+ }
+ Value *const ValueCompare =
+ Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
+ return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
+ SelectValue, "switch.select");
+ }
+
+ return nullptr;
+}
+
+// Helper function to cleanup a switch instruction that has been converted into
+// a select, fixing up PHI nodes and basic blocks.
+static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
+ Value *SelectValue,
+ IRBuilder<> &Builder) {
+ BasicBlock *SelectBB = SI->getParent();
+ while (PHI->getBasicBlockIndex(SelectBB) >= 0)
+ PHI->removeIncomingValue(SelectBB);
+ PHI->addIncoming(SelectValue, SelectBB);
+
+ Builder.CreateBr(PHI->getParent());
+
+ // Remove the switch.
+ for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
+ BasicBlock *Succ = SI->getSuccessor(i);
+
+ if (Succ == PHI->getParent())
+ continue;
+ Succ->removePredecessor(SelectBB);
+ }
+ SI->eraseFromParent();
+}
+
+/// If the switch is only used to initialize one or more
+/// phi nodes in a common successor block with only two different
+/// constant values, replace the switch with select.
+static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
+ AssumptionCache *AC, const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
+ Value *const Cond = SI->getCondition();
+ PHINode *PHI = nullptr;
+ BasicBlock *CommonDest = nullptr;
+ Constant *DefaultResult;
+ SwitchCaseResultVectorTy UniqueResults;
+ // Collect all the cases that will deliver the same value from the switch.
+ if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
+ DL, TTI))
+ return false;
+ // Selects choose between maximum two values.
+ if (UniqueResults.size() != 2)
+ return false;
+ assert(PHI != nullptr && "PHI for value select not found");
+
+ Builder.SetInsertPoint(SI);
+ Value *SelectValue =
+ ConvertTwoCaseSwitch(UniqueResults, DefaultResult, Cond, Builder);
+ if (SelectValue) {
+ RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder);
+ return true;
+ }
+ // The switch couldn't be converted into a select.
+ return false;
+}
+
+namespace {
+
+/// This class represents a lookup table that can be used to replace a switch.
+class SwitchLookupTable {
+public:
+ /// Create a lookup table to use as a switch replacement with the contents
+ /// of Values, using DefaultValue to fill any holes in the table.
+ SwitchLookupTable(
+ Module &M, uint64_t TableSize, ConstantInt *Offset,
+ const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
+ Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
+
+ /// Build instructions with Builder to retrieve the value at
+ /// the position given by Index in the lookup table.
+ Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
+
+ /// Return true if a table with TableSize elements of
+ /// type ElementType would fit in a target-legal register.
+ static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
+ Type *ElementType);
+
+private:
+ // Depending on the contents of the table, it can be represented in
+ // different ways.
+ enum {
+ // For tables where each element contains the same value, we just have to
+ // store that single value and return it for each lookup.
+ SingleValueKind,
+
+ // For tables where there is a linear relationship between table index
+ // and values. We calculate the result with a simple multiplication
+ // and addition instead of a table lookup.
+ LinearMapKind,
+
+ // For small tables with integer elements, we can pack them into a bitmap
+ // that fits into a target-legal register. Values are retrieved by
+ // shift and mask operations.
+ BitMapKind,
+
+ // The table is stored as an array of values. Values are retrieved by load
+ // instructions from the table.
+ ArrayKind
+ } Kind;
+
+ // For SingleValueKind, this is the single value.
+ Constant *SingleValue;
+
+ // For BitMapKind, this is the bitmap.
+ ConstantInt *BitMap;
+ IntegerType *BitMapElementTy;
+
+ // For LinearMapKind, these are the constants used to derive the value.
+ ConstantInt *LinearOffset;
+ ConstantInt *LinearMultiplier;
+
+ // For ArrayKind, this is the array.
+ GlobalVariable *Array;
+};
+
+} // end anonymous namespace
+
+SwitchLookupTable::SwitchLookupTable(
+ Module &M, uint64_t TableSize, ConstantInt *Offset,
+ const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
+ Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
+ : SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr),
+ LinearOffset(nullptr), LinearMultiplier(nullptr), Array(nullptr) {
+ assert(Values.size() && "Can't build lookup table without values!");
+ assert(TableSize >= Values.size() && "Can't fit values in table!");
+
+ // If all values in the table are equal, this is that value.
+ SingleValue = Values.begin()->second;
+
+ Type *ValueType = Values.begin()->second->getType();
+
+ // Build up the table contents.
+ SmallVector<Constant *, 64> TableContents(TableSize);
+ for (size_t I = 0, E = Values.size(); I != E; ++I) {
+ ConstantInt *CaseVal = Values[I].first;
+ Constant *CaseRes = Values[I].second;
+ assert(CaseRes->getType() == ValueType);
+
+ uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
+ TableContents[Idx] = CaseRes;
+
+ if (CaseRes != SingleValue)
+ SingleValue = nullptr;
+ }
+
+ // Fill in any holes in the table with the default result.
+ if (Values.size() < TableSize) {
+ assert(DefaultValue &&
+ "Need a default value to fill the lookup table holes.");
+ assert(DefaultValue->getType() == ValueType);
+ for (uint64_t I = 0; I < TableSize; ++I) {
+ if (!TableContents[I])
+ TableContents[I] = DefaultValue;
+ }
+
+ if (DefaultValue != SingleValue)
+ SingleValue = nullptr;
+ }
+
+ // If each element in the table contains the same value, we only need to store
+ // that single value.
+ if (SingleValue) {
+ Kind = SingleValueKind;
+ return;
+ }
+
+ // Check if we can derive the value with a linear transformation from the
+ // table index.
+ if (isa<IntegerType>(ValueType)) {
+ bool LinearMappingPossible = true;
+ APInt PrevVal;
+ APInt DistToPrev;
+ assert(TableSize >= 2 && "Should be a SingleValue table.");
+ // Check if there is the same distance between two consecutive values.
+ for (uint64_t I = 0; I < TableSize; ++I) {
+ ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
+ if (!ConstVal) {
+ // This is an undef. We could deal with it, but undefs in lookup tables
+ // are very seldom. It's probably not worth the additional complexity.
+ LinearMappingPossible = false;
+ break;
+ }
+ const APInt &Val = ConstVal->getValue();
+ if (I != 0) {
+ APInt Dist = Val - PrevVal;
+ if (I == 1) {
+ DistToPrev = Dist;
+ } else if (Dist != DistToPrev) {
+ LinearMappingPossible = false;
+ break;
+ }
+ }
+ PrevVal = Val;
+ }
+ if (LinearMappingPossible) {
+ LinearOffset = cast<ConstantInt>(TableContents[0]);
+ LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
+ Kind = LinearMapKind;
+ ++NumLinearMaps;
+ return;
+ }
+ }
+
+ // If the type is integer and the table fits in a register, build a bitmap.
+ if (WouldFitInRegister(DL, TableSize, ValueType)) {
+ IntegerType *IT = cast<IntegerType>(ValueType);
+ APInt TableInt(TableSize * IT->getBitWidth(), 0);
+ for (uint64_t I = TableSize; I > 0; --I) {
+ TableInt <<= IT->getBitWidth();
+ // Insert values into the bitmap. Undef values are set to zero.
+ if (!isa<UndefValue>(TableContents[I - 1])) {
+ ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
+ TableInt |= Val->getValue().zext(TableInt.getBitWidth());
+ }
+ }
+ BitMap = ConstantInt::get(M.getContext(), TableInt);
+ BitMapElementTy = IT;
+ Kind = BitMapKind;
+ ++NumBitMaps;
+ return;
+ }
+
+ // Store the table in an array.
+ ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
+ Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
+
+ Array = new GlobalVariable(M, ArrayTy, /*constant=*/true,
+ GlobalVariable::PrivateLinkage, Initializer,
+ "switch.table." + FuncName);
+ Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ Kind = ArrayKind;
+}
+
+Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
+ switch (Kind) {
+ case SingleValueKind:
+ return SingleValue;
+ case LinearMapKind: {
+ // Derive the result value from the input value.
+ Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
+ false, "switch.idx.cast");
+ if (!LinearMultiplier->isOne())
+ Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult");
+ if (!LinearOffset->isZero())
+ Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset");
+ return Result;
+ }
+ case BitMapKind: {
+ // Type of the bitmap (e.g. i59).
+ IntegerType *MapTy = BitMap->getType();
+
+ // Cast Index to the same type as the bitmap.
+ // Note: The Index is <= the number of elements in the table, so
+ // truncating it to the width of the bitmask is safe.
+ Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
+
+ // Multiply the shift amount by the element width.
+ ShiftAmt = Builder.CreateMul(
+ ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
+ "switch.shiftamt");
+
+ // Shift down.
+ Value *DownShifted =
+ Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
+ // Mask off.
+ return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
+ }
+ case ArrayKind: {
+ // Make sure the table index will not overflow when treated as signed.
+ IntegerType *IT = cast<IntegerType>(Index->getType());
+ uint64_t TableSize =
+ Array->getInitializer()->getType()->getArrayNumElements();
+ if (TableSize > (1ULL << (IT->getBitWidth() - 1)))
+ Index = Builder.CreateZExt(
+ Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
+ "switch.tableidx.zext");
+
+ Value *GEPIndices[] = {Builder.getInt32(0), Index};
+ Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
+ GEPIndices, "switch.gep");
+ return Builder.CreateLoad(GEP, "switch.load");
+ }
+ }
+ llvm_unreachable("Unknown lookup table kind!");
+}
+
+bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
+ uint64_t TableSize,
+ Type *ElementType) {
+ auto *IT = dyn_cast<IntegerType>(ElementType);
+ if (!IT)
+ return false;
+ // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
+ // are <= 15, we could try to narrow the type.
+
+ // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
+ if (TableSize >= UINT_MAX / IT->getBitWidth())
+ return false;
+ return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
+}
+
+/// Determine whether a lookup table should be built for this switch, based on
+/// the number of cases, size of the table, and the types of the results.
+static bool
+ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
+ const TargetTransformInfo &TTI, const DataLayout &DL,
+ const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
+ if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
+ return false; // TableSize overflowed, or mul below might overflow.
+
+ bool AllTablesFitInRegister = true;
+ bool HasIllegalType = false;
+ for (const auto &I : ResultTypes) {
+ Type *Ty = I.second;
+
+ // Saturate this flag to true.
+ HasIllegalType = HasIllegalType || !TTI.isTypeLegal(Ty);
+
+ // Saturate this flag to false.
+ AllTablesFitInRegister =
+ AllTablesFitInRegister &&
+ SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty);
+
+ // If both flags saturate, we're done. NOTE: This *only* works with
+ // saturating flags, and all flags have to saturate first due to the
+ // non-deterministic behavior of iterating over a dense map.
+ if (HasIllegalType && !AllTablesFitInRegister)
+ break;
+ }
+
+ // If each table would fit in a register, we should build it anyway.
+ if (AllTablesFitInRegister)
+ return true;
+
+ // Don't build a table that doesn't fit in-register if it has illegal types.
+ if (HasIllegalType)
+ return false;
+
+ // The table density should be at least 40%. This is the same criterion as for
+ // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
+ // FIXME: Find the best cut-off.
+ return SI->getNumCases() * 10 >= TableSize * 4;
+}
+
+/// Try to reuse the switch table index compare. Following pattern:
+/// \code
+/// if (idx < tablesize)
+/// r = table[idx]; // table does not contain default_value
+/// else
+/// r = default_value;
+/// if (r != default_value)
+/// ...
+/// \endcode
+/// Is optimized to:
+/// \code
+/// cond = idx < tablesize;
+/// if (cond)
+/// r = table[idx];
+/// else
+/// r = default_value;
+/// if (cond)
+/// ...
+/// \endcode
+/// Jump threading will then eliminate the second if(cond).
+static void reuseTableCompare(
+ User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
+ Constant *DefaultValue,
+ const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
+
+ ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
+ if (!CmpInst)
+ return;
+
+ // We require that the compare is in the same block as the phi so that jump
+ // threading can do its work afterwards.
+ if (CmpInst->getParent() != PhiBlock)
+ return;
+
+ Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
+ if (!CmpOp1)
+ return;
+
+ Value *RangeCmp = RangeCheckBranch->getCondition();
+ Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
+ Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
+
+ // Check if the compare with the default value is constant true or false.
+ Constant *DefaultConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
+ DefaultValue, CmpOp1, true);
+ if (DefaultConst != TrueConst && DefaultConst != FalseConst)
+ return;
+
+ // Check if the compare with the case values is distinct from the default
+ // compare result.
+ for (auto ValuePair : Values) {
+ Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
+ ValuePair.second, CmpOp1, true);
+ if (!CaseConst || CaseConst == DefaultConst)
+ return;
+ assert((CaseConst == TrueConst || CaseConst == FalseConst) &&
+ "Expect true or false as compare result.");
+ }
+
+ // Check if the branch instruction dominates the phi node. It's a simple
+ // dominance check, but sufficient for our needs.
+ // Although this check is invariant in the calling loops, it's better to do it
+ // at this late stage. Practically we do it at most once for a switch.
+ BasicBlock *BranchBlock = RangeCheckBranch->getParent();
+ for (auto PI = pred_begin(PhiBlock), E = pred_end(PhiBlock); PI != E; ++PI) {
+ BasicBlock *Pred = *PI;
+ if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
+ return;
+ }
+
+ if (DefaultConst == FalseConst) {
+ // The compare yields the same result. We can replace it.
+ CmpInst->replaceAllUsesWith(RangeCmp);
+ ++NumTableCmpReuses;
+ } else {
+ // The compare yields the same result, just inverted. We can replace it.
+ Value *InvertedTableCmp = BinaryOperator::CreateXor(
+ RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
+ RangeCheckBranch);
+ CmpInst->replaceAllUsesWith(InvertedTableCmp);
+ ++NumTableCmpReuses;
+ }
+}
+
+/// If the switch is only used to initialize one or more phi nodes in a common
+/// successor block with different constant values, replace the switch with
+/// lookup tables.
+static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
+ const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
+ assert(SI->getNumCases() > 1 && "Degenerate switch?");
+
+ // Only build lookup table when we have a target that supports it.
+ if (!TTI.shouldBuildLookupTables())
+ return false;
+
+ // FIXME: If the switch is too sparse for a lookup table, perhaps we could
+ // split off a dense part and build a lookup table for that.
+
+ // FIXME: This creates arrays of GEPs to constant strings, which means each
+ // GEP needs a runtime relocation in PIC code. We should just build one big
+ // string and lookup indices into that.
+
+ // Ignore switches with less than three cases. Lookup tables will not make
+ // them
+ // faster, so we don't analyze them.
+ if (SI->getNumCases() < 3)
+ return false;
+
+ // Figure out the corresponding result for each case value and phi node in the
+ // common destination, as well as the min and max case values.
+ assert(SI->case_begin() != SI->case_end());
+ SwitchInst::CaseIt CI = SI->case_begin();
+ ConstantInt *MinCaseVal = CI->getCaseValue();
+ ConstantInt *MaxCaseVal = CI->getCaseValue();
+
+ BasicBlock *CommonDest = nullptr;
+ typedef SmallVector<std::pair<ConstantInt *, Constant *>, 4> ResultListTy;
+ SmallDenseMap<PHINode *, ResultListTy> ResultLists;
+ SmallDenseMap<PHINode *, Constant *> DefaultResults;
+ SmallDenseMap<PHINode *, Type *> ResultTypes;
+ SmallVector<PHINode *, 4> PHIs;
+
+ for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
+ ConstantInt *CaseVal = CI->getCaseValue();
+ if (CaseVal->getValue().slt(MinCaseVal->getValue()))
+ MinCaseVal = CaseVal;
+ if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
+ MaxCaseVal = CaseVal;
+
+ // Resulting value at phi nodes for this case value.
+ typedef SmallVector<std::pair<PHINode *, Constant *>, 4> ResultsTy;
+ ResultsTy Results;
+ if (!GetCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
+ Results, DL, TTI))
+ return false;
+
+ // Append the result from this case to the list for each phi.
+ for (const auto &I : Results) {
+ PHINode *PHI = I.first;
+ Constant *Value = I.second;
+ if (!ResultLists.count(PHI))
+ PHIs.push_back(PHI);
+ ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
+ }
+ }
+
+ // Keep track of the result types.
+ for (PHINode *PHI : PHIs) {
+ ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
+ }
+
+ uint64_t NumResults = ResultLists[PHIs[0]].size();
+ APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
+ uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
+ bool TableHasHoles = (NumResults < TableSize);
+
+ // If the table has holes, we need a constant result for the default case
+ // or a bitmask that fits in a register.
+ SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
+ bool HasDefaultResults =
+ GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
+ DefaultResultsList, DL, TTI);
+
+ bool NeedMask = (TableHasHoles && !HasDefaultResults);
+ if (NeedMask) {
+ // As an extra penalty for the validity test we require more cases.
+ if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
+ return false;
+ if (!DL.fitsInLegalInteger(TableSize))
+ return false;
+ }
+
+ for (const auto &I : DefaultResultsList) {
+ PHINode *PHI = I.first;
+ Constant *Result = I.second;
+ DefaultResults[PHI] = Result;
+ }
+
+ if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
+ return false;
+
+ // Create the BB that does the lookups.
+ Module &Mod = *CommonDest->getParent()->getParent();
+ BasicBlock *LookupBB = BasicBlock::Create(
+ Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
+
+ // Compute the table index value.
+ Builder.SetInsertPoint(SI);
+ Value *TableIndex =
+ Builder.CreateSub(SI->getCondition(), MinCaseVal, "switch.tableidx");
+
+ // Compute the maximum table size representable by the integer type we are
+ // switching upon.
+ unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
+ uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
+ assert(MaxTableSize >= TableSize &&
+ "It is impossible for a switch to have more entries than the max "
+ "representable value of its input integer type's size.");
+
+ // If the default destination is unreachable, or if the lookup table covers
+ // all values of the conditional variable, branch directly to the lookup table
+ // BB. Otherwise, check that the condition is within the case range.
+ const bool DefaultIsReachable =
+ !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+ const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
+ BranchInst *RangeCheckBranch = nullptr;
+
+ if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
+ Builder.CreateBr(LookupBB);
+ // Note: We call removeProdecessor later since we need to be able to get the
+ // PHI value for the default case in case we're using a bit mask.
+ } else {
+ Value *Cmp = Builder.CreateICmpULT(
+ TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
+ RangeCheckBranch =
+ Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+ }
+
+ // Populate the BB that does the lookups.
+ Builder.SetInsertPoint(LookupBB);
+
+ if (NeedMask) {
+ // Before doing the lookup we do the hole check.
+ // The LookupBB is therefore re-purposed to do the hole check
+ // and we create a new LookupBB.
+ BasicBlock *MaskBB = LookupBB;
+ MaskBB->setName("switch.hole_check");
+ LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
+ CommonDest->getParent(), CommonDest);
+
+ // Make the mask's bitwidth at least 8bit and a power-of-2 to avoid
+ // unnecessary illegal types.
+ uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
+ APInt MaskInt(TableSizePowOf2, 0);
+ APInt One(TableSizePowOf2, 1);
+ // Build bitmask; fill in a 1 bit for every case.
+ const ResultListTy &ResultList = ResultLists[PHIs[0]];
+ for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
+ uint64_t Idx = (ResultList[I].first->getValue() - MinCaseVal->getValue())
+ .getLimitedValue();
+ MaskInt |= One << Idx;
+ }
+ ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
+
+ // Get the TableIndex'th bit of the bitmask.
+ // If this bit is 0 (meaning hole) jump to the default destination,
+ // else continue with table lookup.
+ IntegerType *MapTy = TableMask->getType();
+ Value *MaskIndex =
+ Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
+ Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
+ Value *LoBit = Builder.CreateTrunc(
+ Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
+ Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
+
+ Builder.SetInsertPoint(LookupBB);
+ AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, SI->getParent());
+ }
+
+ if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
+ // We cached PHINodes in PHIs, to avoid accessing deleted PHINodes later,
+ // do not delete PHINodes here.
+ SI->getDefaultDest()->removePredecessor(SI->getParent(),
+ /*DontDeleteUselessPHIs=*/true);
+ }
+
+ bool ReturnedEarly = false;
+ for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
+ PHINode *PHI = PHIs[I];
+ const ResultListTy &ResultList = ResultLists[PHI];
+
+ // If using a bitmask, use any value to fill the lookup table holes.
+ Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
+ StringRef FuncName = SI->getParent()->getParent()->getName();
+ SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL,
+ FuncName);
+
+ Value *Result = Table.BuildLookup(TableIndex, Builder);
+
+ // If the result is used to return immediately from the function, we want to
+ // do that right here.
+ if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->user_begin()) &&
+ PHI->user_back() == CommonDest->getFirstNonPHIOrDbg()) {
+ Builder.CreateRet(Result);
+ ReturnedEarly = true;
+ break;
+ }
+
+ // Do a small peephole optimization: re-use the switch table compare if
+ // possible.
+ if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
+ BasicBlock *PhiBlock = PHI->getParent();
+ // Search for compare instructions which use the phi.
+ for (auto *User : PHI->users()) {
+ reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
+ }
+ }
+
+ PHI->addIncoming(Result, LookupBB);
+ }
+
+ if (!ReturnedEarly)
+ Builder.CreateBr(CommonDest);
+
+ // Remove the switch.
+ for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
+ BasicBlock *Succ = SI->getSuccessor(i);
+
+ if (Succ == SI->getDefaultDest())
+ continue;
+ Succ->removePredecessor(SI->getParent());
+ }
+ SI->eraseFromParent();
+
+ ++NumLookupTables;
+ if (NeedMask)
+ ++NumLookupTablesHoles;
+ return true;
+}
+
+static bool isSwitchDense(ArrayRef<int64_t> Values) {
+ // See also SelectionDAGBuilder::isDense(), which this function was based on.
+ uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
+ uint64_t Range = Diff + 1;
+ uint64_t NumCases = Values.size();
+ // 40% is the default density for building a jump table in optsize/minsize mode.
+ uint64_t MinDensity = 40;
+
+ return NumCases * 100 >= Range * MinDensity;
+}
+
+// Try and transform a switch that has "holes" in it to a contiguous sequence
+// of cases.
+//
+// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
+// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
+//
+// This converts a sparse switch into a dense switch which allows better
+// lowering and could also allow transforming into a lookup table.
+static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
+ const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
+ auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
+ if (CondTy->getIntegerBitWidth() > 64 ||
+ !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
+ return false;
+ // Only bother with this optimization if there are more than 3 switch cases;
+ // SDAG will only bother creating jump tables for 4 or more cases.
+ if (SI->getNumCases() < 4)
+ return false;
+
+ // This transform is agnostic to the signedness of the input or case values. We
+ // can treat the case values as signed or unsigned. We can optimize more common
+ // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
+ // as signed.
+ SmallVector<int64_t,4> Values;
+ for (auto &C : SI->cases())
+ Values.push_back(C.getCaseValue()->getValue().getSExtValue());
+ std::sort(Values.begin(), Values.end());
+
+ // If the switch is already dense, there's nothing useful to do here.
+ if (isSwitchDense(Values))
+ return false;
+
+ // First, transform the values such that they start at zero and ascend.
+ int64_t Base = Values[0];
+ for (auto &V : Values)
+ V -= Base;
+
+ // Now we have signed numbers that have been shifted so that, given enough
+ // precision, there are no negative values. Since the rest of the transform
+ // is bitwise only, we switch now to an unsigned representation.
+ uint64_t GCD = 0;
+ for (auto &V : Values)
+ GCD = GreatestCommonDivisor64(GCD, (uint64_t)V);
+
+ // This transform can be done speculatively because it is so cheap - it results
+ // in a single rotate operation being inserted. This can only happen if the
+ // factor extracted is a power of 2.
+ // FIXME: If the GCD is an odd number we can multiply by the multiplicative
+ // inverse of GCD and then perform this transform.
+ // FIXME: It's possible that optimizing a switch on powers of two might also
+ // be beneficial - flag values are often powers of two and we could use a CLZ
+ // as the key function.
+ if (GCD <= 1 || !isPowerOf2_64(GCD))
+ // No common divisor found or too expensive to compute key function.
+ return false;
+
+ unsigned Shift = Log2_64(GCD);
+ for (auto &V : Values)
+ V = (int64_t)((uint64_t)V >> Shift);
+
+ if (!isSwitchDense(Values))
+ // Transform didn't create a dense switch.
+ return false;
+
+ // The obvious transform is to shift the switch condition right and emit a
+ // check that the condition actually cleanly divided by GCD, i.e.
+ // C & (1 << Shift - 1) == 0
+ // inserting a new CFG edge to handle the case where it didn't divide cleanly.
+ //
+ // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
+ // shift and puts the shifted-off bits in the uppermost bits. If any of these
+ // are nonzero then the switch condition will be very large and will hit the
+ // default case.
+
+ auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
+ Builder.SetInsertPoint(SI);
+ auto *ShiftC = ConstantInt::get(Ty, Shift);
+ auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
+ auto *LShr = Builder.CreateLShr(Sub, ShiftC);
+ auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift);
+ auto *Rot = Builder.CreateOr(LShr, Shl);
+ SI->replaceUsesOfWith(SI->getCondition(), Rot);
+
+ for (auto Case : SI->cases()) {
+ auto *Orig = Case.getCaseValue();
+ auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
+ Case.setValue(
+ cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue()))));
+ }
+ return true;
+}
+
+bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
+ BasicBlock *BB = SI->getParent();
+
+ if (isValueEqualityComparison(SI)) {
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+
+ Value *Cond = SI->getCondition();
+ if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
+ if (SimplifySwitchOnSelect(SI, Select))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+
+ // If the block only contains the switch, see if we can fold the block
+ // away into any preds.
+ BasicBlock::iterator BBI = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ if (SI == &*BBI)
+ if (FoldValueComparisonIntoPredecessors(SI, Builder))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ }
+
+ // Try to transform the switch into an icmp and a branch.
+ if (TurnSwitchRangeIntoICmp(SI, Builder))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+
+ // Remove unreachable cases.
+ if (EliminateDeadSwitchCases(SI, AC, DL))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+
+ if (SwitchToSelect(SI, Builder, AC, DL, TTI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+
+ if (ForwardSwitchConditionToPHI(SI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+
+ // The conversion from switch to lookup tables results in difficult
+ // to analyze code and makes pruning branches much harder.
+ // This is a problem of the switch expression itself can still be
+ // restricted as a result of inlining or CVP. There only apply this
+ // transformation during late steps of the optimisation chain.
+ if (LateSimplifyCFG && SwitchToLookupTable(SI, Builder, DL, TTI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+
+ if (ReduceSwitchRange(SI, Builder, DL, TTI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+
+ return false;
+}
+
+bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
+ BasicBlock *BB = IBI->getParent();
+ bool Changed = false;
+
+ // Eliminate redundant destinations.
+ SmallPtrSet<Value *, 8> Succs;
+ for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+ BasicBlock *Dest = IBI->getDestination(i);
+ if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
+ Dest->removePredecessor(BB);
+ IBI->removeDestination(i);
+ --i;
+ --e;
+ Changed = true;
+ }
+ }
+
+ if (IBI->getNumDestinations() == 0) {
+ // If the indirectbr has no successors, change it to unreachable.
+ new UnreachableInst(IBI->getContext(), IBI);
+ EraseTerminatorInstAndDCECond(IBI);
+ return true;
+ }
+
+ if (IBI->getNumDestinations() == 1) {
+ // If the indirectbr has one successor, change it to a direct branch.
+ BranchInst::Create(IBI->getDestination(0), IBI);
+ EraseTerminatorInstAndDCECond(IBI);
+ return true;
+ }
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
+ if (SimplifyIndirectBrOnSelect(IBI, SI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ }
+ return Changed;
+}
+
+/// Given an block with only a single landing pad and a unconditional branch
+/// try to find another basic block which this one can be merged with. This
+/// handles cases where we have multiple invokes with unique landing pads, but
+/// a shared handler.
+///
+/// We specifically choose to not worry about merging non-empty blocks
+/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
+/// practice, the optimizer produces empty landing pad blocks quite frequently
+/// when dealing with exception dense code. (see: instcombine, gvn, if-else
+/// sinking in this file)
+///
+/// This is primarily a code size optimization. We need to avoid performing
+/// any transform which might inhibit optimization (such as our ability to
+/// specialize a particular handler via tail commoning). We do this by not
+/// merging any blocks which require us to introduce a phi. Since the same
+/// values are flowing through both blocks, we don't loose any ability to
+/// specialize. If anything, we make such specialization more likely.
+///
+/// TODO - This transformation could remove entries from a phi in the target
+/// block when the inputs in the phi are the same for the two blocks being
+/// merged. In some cases, this could result in removal of the PHI entirely.
+static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
+ BasicBlock *BB) {
+ auto Succ = BB->getUniqueSuccessor();
+ assert(Succ);
+ // If there's a phi in the successor block, we'd likely have to introduce
+ // a phi into the merged landing pad block.
+ if (isa<PHINode>(*Succ->begin()))
+ return false;
+
+ for (BasicBlock *OtherPred : predecessors(Succ)) {
+ if (BB == OtherPred)
+ continue;
+ BasicBlock::iterator I = OtherPred->begin();
+ LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
+ if (!LPad2 || !LPad2->isIdenticalTo(LPad))
+ continue;
+ for (++I; isa<DbgInfoIntrinsic>(I); ++I) {
+ }
+ BranchInst *BI2 = dyn_cast<BranchInst>(I);
+ if (!BI2 || !BI2->isIdenticalTo(BI))
+ continue;
+
+ // We've found an identical block. Update our predecessors to take that
+ // path instead and make ourselves dead.
+ SmallSet<BasicBlock *, 16> Preds;
+ Preds.insert(pred_begin(BB), pred_end(BB));
+ for (BasicBlock *Pred : Preds) {
+ InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
+ assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
+ "unexpected successor");
+ II->setUnwindDest(OtherPred);
+ }
+
+ // The debug info in OtherPred doesn't cover the merged control flow that
+ // used to go through BB. We need to delete it or update it.
+ for (auto I = OtherPred->begin(), E = OtherPred->end(); I != E;) {
+ Instruction &Inst = *I;
+ I++;
+ if (isa<DbgInfoIntrinsic>(Inst))
+ Inst.eraseFromParent();
+ }
+
+ SmallSet<BasicBlock *, 16> Succs;
+ Succs.insert(succ_begin(BB), succ_end(BB));
+ for (BasicBlock *Succ : Succs) {
+ Succ->removePredecessor(BB);
+ }
+
+ IRBuilder<> Builder(BI);
+ Builder.CreateUnreachable();
+ BI->eraseFromParent();
+ return true;
+ }
+ return false;
+}
+
+bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
+ IRBuilder<> &Builder) {
+ BasicBlock *BB = BI->getParent();
+ BasicBlock *Succ = BI->getSuccessor(0);
+
+ if (SinkCommon && SinkThenElseCodeToEnd(BI))
+ return true;
+
+ // If the Terminator is the only non-phi instruction, simplify the block.
+ // if LoopHeader is provided, check if the block or its successor is a loop
+ // header (This is for early invocations before loop simplify and
+ // vectorization to keep canonical loop forms for nested loops. These blocks
+ // can be eliminated when the pass is invoked later in the back-end.)
+ bool NeedCanonicalLoop =
+ !LateSimplifyCFG &&
+ (LoopHeaders && (LoopHeaders->count(BB) || LoopHeaders->count(Succ)));
+ BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
+ if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
+ !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB))
+ return true;
+
+ // If the only instruction in the block is a seteq/setne comparison
+ // against a constant, try to simplify the block.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
+ if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
+ for (++I; isa<DbgInfoIntrinsic>(I); ++I)
+ ;
+ if (I->isTerminator() &&
+ TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI,
+ BonusInstThreshold, AC))
+ return true;
+ }
+
+ // See if we can merge an empty landing pad block with another which is
+ // equivalent.
+ if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
+ for (++I; isa<DbgInfoIntrinsic>(I); ++I) {
+ }
+ if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB))
+ return true;
+ }
+
+ // If this basic block is ONLY a compare and a branch, and if a predecessor
+ // branches to us and our successor, fold the comparison into the
+ // predecessor and use logical operations to update the incoming value
+ // for PHI nodes in common successor.
+ if (FoldBranchToCommonDest(BI, BonusInstThreshold))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ return false;
+}
+
+static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
+ BasicBlock *PredPred = nullptr;
+ for (auto *P : predecessors(BB)) {
+ BasicBlock *PPred = P->getSinglePredecessor();
+ if (!PPred || (PredPred && PredPred != PPred))
+ return nullptr;
+ PredPred = PPred;
+ }
+ return PredPred;
+}
+
+bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
+ BasicBlock *BB = BI->getParent();
+
+ // Conditional branch
+ if (isValueEqualityComparison(BI)) {
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this
+ // switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+
+ // This block must be empty, except for the setcond inst, if it exists.
+ // Ignore dbg intrinsics.
+ BasicBlock::iterator I = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ if (&*I == BI) {
+ if (FoldValueComparisonIntoPredecessors(BI, Builder))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ } else if (&*I == cast<Instruction>(BI->getCondition())) {
+ ++I;
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ }
+ }
+
+ // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
+ if (SimplifyBranchOnICmpChain(BI, Builder, DL))
+ return true;
+
+ // If this basic block has a single dominating predecessor block and the
+ // dominating block's condition implies BI's condition, we know the direction
+ // of the BI branch.
+ if (BasicBlock *Dom = BB->getSinglePredecessor()) {
+ auto *PBI = dyn_cast_or_null<BranchInst>(Dom->getTerminator());
+ if (PBI && PBI->isConditional() &&
+ PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+ assert(PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB);
+ bool CondIsFalse = PBI->getSuccessor(1) == BB;
+ Optional<bool> Implication = isImpliedCondition(
+ PBI->getCondition(), BI->getCondition(), DL, CondIsFalse);
+ if (Implication) {
+ // Turn this into a branch on constant.
+ auto *OldCond = BI->getCondition();
+ ConstantInt *CI = *Implication
+ ? ConstantInt::getTrue(BB->getContext())
+ : ConstantInt::getFalse(BB->getContext());
+ BI->setCondition(CI);
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ }
+ }
+ }
+
+ // If this basic block is ONLY a compare and a branch, and if a predecessor
+ // branches to us and one of our successors, fold the comparison into the
+ // predecessor and use logical operations to pick the right destination.
+ if (FoldBranchToCommonDest(BI, BonusInstThreshold))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+
+ // We have a conditional branch to two blocks that are only reachable
+ // from BI. We know that the condbr dominates the two blocks, so see if
+ // there is any identical code in the "then" and "else" blocks. If so, we
+ // can hoist it up to the branching block.
+ if (BI->getSuccessor(0)->getSinglePredecessor()) {
+ if (BI->getSuccessor(1)->getSinglePredecessor()) {
+ if (HoistThenElseCodeToIf(BI, TTI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ } else {
+ // If Successor #1 has multiple preds, we may be able to conditionally
+ // execute Successor #0 if it branches to Successor #1.
+ TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator();
+ if (Succ0TI->getNumSuccessors() == 1 &&
+ Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ }
+ } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
+ // If Successor #0 has multiple preds, we may be able to conditionally
+ // execute Successor #1 if it branches to Successor #0.
+ TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
+ if (Succ1TI->getNumSuccessors() == 1 &&
+ Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+ }
+
+ // If this is a branch on a phi node in the current block, thread control
+ // through this block if any PHI node entries are constants.
+ if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
+ if (PN->getParent() == BI->getParent())
+ if (FoldCondBranchOnPHI(BI, DL, AC))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+
+ // Scan predecessor blocks for conditional branches.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+ if (PBI != BI && PBI->isConditional())
+ if (SimplifyCondBranchToCondBranch(PBI, BI, DL))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+
+ // Look for diamond patterns.
+ if (MergeCondStores)
+ if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
+ if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
+ if (PBI != BI && PBI->isConditional())
+ if (mergeConditionalStores(PBI, BI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+
+ return false;
+}
+
+/// Check if passing a value to an instruction will cause undefined behavior.
+static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
+ Constant *C = dyn_cast<Constant>(V);
+ if (!C)
+ return false;
+
+ if (I->use_empty())
+ return false;
+
+ if (C->isNullValue() || isa<UndefValue>(C)) {
+ // Only look at the first use, avoid hurting compile time with long uselists
+ User *Use = *I->user_begin();
+
+ // Now make sure that there are no instructions in between that can alter
+ // control flow (eg. calls)
+ for (BasicBlock::iterator
+ i = ++BasicBlock::iterator(I),
+ UI = BasicBlock::iterator(dyn_cast<Instruction>(Use));
+ i != UI; ++i)
+ if (i == I->getParent()->end() || i->mayHaveSideEffects())
+ return false;
+
+ // Look through GEPs. A load from a GEP derived from NULL is still undefined
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
+ if (GEP->getPointerOperand() == I)
+ return passingValueIsAlwaysUndefined(V, GEP);
+
+ // Look through bitcasts.
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
+ return passingValueIsAlwaysUndefined(V, BC);
+
+ // Load from null is undefined.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Use))
+ if (!LI->isVolatile())
+ return LI->getPointerAddressSpace() == 0;
+
+ // Store to null is undefined.
+ if (StoreInst *SI = dyn_cast<StoreInst>(Use))
+ if (!SI->isVolatile())
+ return SI->getPointerAddressSpace() == 0 &&
+ SI->getPointerOperand() == I;
+
+ // A call to null is undefined.
+ if (auto CS = CallSite(Use))
+ return CS.getCalledValue() == I;
+ }
+ return false;
+}
+
+/// If BB has an incoming value that will always trigger undefined behavior
+/// (eg. null pointer dereference), remove the branch leading here.
+static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
+ for (BasicBlock::iterator i = BB->begin();
+ PHINode *PHI = dyn_cast<PHINode>(i); ++i)
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+ if (passingValueIsAlwaysUndefined(PHI->getIncomingValue(i), PHI)) {
+ TerminatorInst *T = PHI->getIncomingBlock(i)->getTerminator();
+ IRBuilder<> Builder(T);
+ if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
+ BB->removePredecessor(PHI->getIncomingBlock(i));
+ // Turn uncoditional branches into unreachables and remove the dead
+ // destination from conditional branches.
+ if (BI->isUnconditional())
+ Builder.CreateUnreachable();
+ else
+ Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
+ : BI->getSuccessor(0));
+ BI->eraseFromParent();
+ return true;
+ }
+ // TODO: SwitchInst.
+ }
+
+ return false;
+}
+
+bool SimplifyCFGOpt::run(BasicBlock *BB) {
+ bool Changed = false;
+
+ assert(BB && BB->getParent() && "Block not embedded in function!");
+ assert(BB->getTerminator() && "Degenerate basic block encountered!");
+
+ // Remove basic blocks that have no predecessors (except the entry block)...
+ // or that just have themself as a predecessor. These are unreachable.
+ if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
+ BB->getSinglePredecessor() == BB) {
+ DEBUG(dbgs() << "Removing BB: \n" << *BB);
+ DeleteDeadBlock(BB);
+ return true;
+ }
+
+ // Check to see if we can constant propagate this terminator instruction
+ // away...
+ Changed |= ConstantFoldTerminator(BB, true);
+
+ // Check for and eliminate duplicate PHI nodes in this block.
+ Changed |= EliminateDuplicatePHINodes(BB);
+
+ // Check for and remove branches that will always cause undefined behavior.
+ Changed |= removeUndefIntroducingPredecessor(BB);
+
+ // Merge basic blocks into their predecessor if there is only one distinct
+ // pred, and if there is only one distinct successor of the predecessor, and
+ // if there are no PHI nodes.
+ //
+ if (MergeBlockIntoPredecessor(BB))
+ return true;
+
+ IRBuilder<> Builder(BB);
+
+ // If there is a trivial two-entry PHI node in this basic block, and we can
+ // eliminate it, do so now.
+ if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
+ if (PN->getNumIncomingValues() == 2)
+ Changed |= FoldTwoEntryPHINode(PN, TTI, DL);
+
+ Builder.SetInsertPoint(BB->getTerminator());
+ if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ if (BI->isUnconditional()) {
+ if (SimplifyUncondBranch(BI, Builder))
+ return true;
+ } else {
+ if (SimplifyCondBranch(BI, Builder))
+ return true;
+ }
+ } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ if (SimplifyReturn(RI, Builder))
+ return true;
+ } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
+ if (SimplifyResume(RI, Builder))
+ return true;
+ } else if (CleanupReturnInst *RI =
+ dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
+ if (SimplifyCleanupReturn(RI))
+ return true;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ if (SimplifySwitch(SI, Builder))
+ return true;
+ } else if (UnreachableInst *UI =
+ dyn_cast<UnreachableInst>(BB->getTerminator())) {
+ if (SimplifyUnreachable(UI))
+ return true;
+ } else if (IndirectBrInst *IBI =
+ dyn_cast<IndirectBrInst>(BB->getTerminator())) {
+ if (SimplifyIndirectBr(IBI))
+ return true;
+ }
+
+ return Changed;
+}
+
+/// This function is used to do simplification of a CFG.
+/// For example, it adjusts branches to branches to eliminate the extra hop,
+/// eliminates unreachable basic blocks, and does other "peephole" optimization
+/// of the CFG. It returns true if a modification was made.
+///
+bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
+ unsigned BonusInstThreshold, AssumptionCache *AC,
+ SmallPtrSetImpl<BasicBlock *> *LoopHeaders,
+ bool LateSimplifyCFG) {
+ return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(),
+ BonusInstThreshold, AC, LoopHeaders, LateSimplifyCFG)
+ .run(BB);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
new file mode 100644
index 000000000000..6d90e6b48358
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -0,0 +1,765 @@
+//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements induction variable simplification. It does
+// not define any actual pass or policy, but provides a single function to
+// simplify a loop's induction variables based on ScalarEvolution.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "indvars"
+
+STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
+STATISTIC(NumElimOperand, "Number of IV operands folded into a use");
+STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
+STATISTIC(
+ NumSimplifiedSDiv,
+ "Number of IV signed division operations converted to unsigned division");
+STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
+
+namespace {
+ /// This is a utility for simplifying induction variables
+ /// based on ScalarEvolution. It is the primary instrument of the
+ /// IndvarSimplify pass, but it may also be directly invoked to cleanup after
+ /// other loop passes that preserve SCEV.
+ class SimplifyIndvar {
+ Loop *L;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ DominatorTree *DT;
+
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts;
+
+ bool Changed;
+
+ public:
+ SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
+ LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead)
+ : L(Loop), LI(LI), SE(SE), DT(DT), DeadInsts(Dead), Changed(false) {
+ assert(LI && "IV simplification requires LoopInfo");
+ }
+
+ bool hasChanged() const { return Changed; }
+
+ /// Iteratively perform simplification on a worklist of users of the
+ /// specified induction variable. This is the top-level driver that applies
+ /// all simplifications to users of an IV.
+ void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr);
+
+ Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand);
+
+ bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
+
+ bool eliminateOverflowIntrinsic(CallInst *CI);
+ bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
+ void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
+ void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
+ bool IsSigned);
+ bool eliminateSDiv(BinaryOperator *SDiv);
+ bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand);
+ bool strengthenRightShift(BinaryOperator *BO, Value *IVOperand);
+ };
+}
+
+/// Fold an IV operand into its use. This removes increments of an
+/// aligned IV when used by a instruction that ignores the low bits.
+///
+/// IVOperand is guaranteed SCEVable, but UseInst may not be.
+///
+/// Return the operand of IVOperand for this induction variable if IVOperand can
+/// be folded (in case more folding opportunities have been exposed).
+/// Otherwise return null.
+Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) {
+ Value *IVSrc = nullptr;
+ unsigned OperIdx = 0;
+ const SCEV *FoldedExpr = nullptr;
+ switch (UseInst->getOpcode()) {
+ default:
+ return nullptr;
+ case Instruction::UDiv:
+ case Instruction::LShr:
+ // We're only interested in the case where we know something about
+ // the numerator and have a constant denominator.
+ if (IVOperand != UseInst->getOperand(OperIdx) ||
+ !isa<ConstantInt>(UseInst->getOperand(1)))
+ return nullptr;
+
+ // Attempt to fold a binary operator with constant operand.
+ // e.g. ((I + 1) >> 2) => I >> 2
+ if (!isa<BinaryOperator>(IVOperand)
+ || !isa<ConstantInt>(IVOperand->getOperand(1)))
+ return nullptr;
+
+ IVSrc = IVOperand->getOperand(0);
+ // IVSrc must be the (SCEVable) IV, since the other operand is const.
+ assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand");
+
+ ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1));
+ if (UseInst->getOpcode() == Instruction::LShr) {
+ // Get a constant for the divisor. See createSCEV.
+ uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth();
+ if (D->getValue().uge(BitWidth))
+ return nullptr;
+
+ D = ConstantInt::get(UseInst->getContext(),
+ APInt::getOneBitSet(BitWidth, D->getZExtValue()));
+ }
+ FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D));
+ }
+ // We have something that might fold it's operand. Compare SCEVs.
+ if (!SE->isSCEVable(UseInst->getType()))
+ return nullptr;
+
+ // Bypass the operand if SCEV can prove it has no effect.
+ if (SE->getSCEV(UseInst) != FoldedExpr)
+ return nullptr;
+
+ DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
+ << " -> " << *UseInst << '\n');
+
+ UseInst->setOperand(OperIdx, IVSrc);
+ assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper");
+
+ ++NumElimOperand;
+ Changed = true;
+ if (IVOperand->use_empty())
+ DeadInsts.emplace_back(IVOperand);
+ return IVSrc;
+}
+
+/// SimplifyIVUsers helper for eliminating useless
+/// comparisons against an induction variable.
+void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
+ unsigned IVOperIdx = 0;
+ ICmpInst::Predicate Pred = ICmp->getPredicate();
+ ICmpInst::Predicate OriginalPred = Pred;
+ if (IVOperand != ICmp->getOperand(0)) {
+ // Swapped
+ assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
+ IVOperIdx = 1;
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ // Get the SCEVs for the ICmp operands.
+ const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx));
+ const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx));
+
+ // Simplify unnecessary loops away.
+ const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
+ S = SE->getSCEVAtScope(S, ICmpLoop);
+ X = SE->getSCEVAtScope(X, ICmpLoop);
+
+ ICmpInst::Predicate InvariantPredicate;
+ const SCEV *InvariantLHS, *InvariantRHS;
+
+ // If the condition is always true or always false, replace it with
+ // a constant value.
+ if (SE->isKnownPredicate(Pred, S, X)) {
+ ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
+ DeadInsts.emplace_back(ICmp);
+ DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+ } else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) {
+ ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
+ DeadInsts.emplace_back(ICmp);
+ DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+ } else if (isa<PHINode>(IVOperand) &&
+ SE->isLoopInvariantPredicate(Pred, S, X, L, InvariantPredicate,
+ InvariantLHS, InvariantRHS)) {
+
+ // Rewrite the comparison to a loop invariant comparison if it can be done
+ // cheaply, where cheaply means "we don't need to emit any new
+ // instructions".
+
+ Value *NewLHS = nullptr, *NewRHS = nullptr;
+
+ if (S == InvariantLHS || X == InvariantLHS)
+ NewLHS =
+ ICmp->getOperand(S == InvariantLHS ? IVOperIdx : (1 - IVOperIdx));
+
+ if (S == InvariantRHS || X == InvariantRHS)
+ NewRHS =
+ ICmp->getOperand(S == InvariantRHS ? IVOperIdx : (1 - IVOperIdx));
+
+ auto *PN = cast<PHINode>(IVOperand);
+ for (unsigned i = 0, e = PN->getNumIncomingValues();
+ i != e && (!NewLHS || !NewRHS);
+ ++i) {
+
+ // If this is a value incoming from the backedge, then it cannot be a loop
+ // invariant value (since we know that IVOperand is an induction variable).
+ if (L->contains(PN->getIncomingBlock(i)))
+ continue;
+
+ // NB! This following assert does not fundamentally have to be true, but
+ // it is true today given how SCEV analyzes induction variables.
+ // Specifically, today SCEV will *not* recognize %iv as an induction
+ // variable in the following case:
+ //
+ // define void @f(i32 %k) {
+ // entry:
+ // br i1 undef, label %r, label %l
+ //
+ // l:
+ // %k.inc.l = add i32 %k, 1
+ // br label %loop
+ //
+ // r:
+ // %k.inc.r = add i32 %k, 1
+ // br label %loop
+ //
+ // loop:
+ // %iv = phi i32 [ %k.inc.l, %l ], [ %k.inc.r, %r ], [ %iv.inc, %loop ]
+ // %iv.inc = add i32 %iv, 1
+ // br label %loop
+ // }
+ //
+ // but if it starts to, at some point, then the assertion below will have
+ // to be changed to a runtime check.
+
+ Value *Incoming = PN->getIncomingValue(i);
+
+#ifndef NDEBUG
+ if (auto *I = dyn_cast<Instruction>(Incoming))
+ assert(DT->dominates(I, ICmp) && "Should be a unique loop dominating value!");
+#endif
+
+ const SCEV *IncomingS = SE->getSCEV(Incoming);
+
+ if (!NewLHS && IncomingS == InvariantLHS)
+ NewLHS = Incoming;
+ if (!NewRHS && IncomingS == InvariantRHS)
+ NewRHS = Incoming;
+ }
+
+ if (!NewLHS || !NewRHS)
+ // We could not find an existing value to replace either LHS or RHS.
+ // Generating new instructions has subtler tradeoffs, so avoid doing that
+ // for now.
+ return;
+
+ DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n');
+ ICmp->setPredicate(InvariantPredicate);
+ ICmp->setOperand(0, NewLHS);
+ ICmp->setOperand(1, NewRHS);
+ } else if (ICmpInst::isSigned(OriginalPred) &&
+ SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) {
+ // If we were unable to make anything above, all we can is to canonicalize
+ // the comparison hoping that it will open the doors for other
+ // optimizations. If we find out that we compare two non-negative values,
+ // we turn the instruction's predicate to its unsigned version. Note that
+ // we cannot rely on Pred here unless we check if we have swapped it.
+ assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?");
+ DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp << '\n');
+ ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred));
+ } else
+ return;
+
+ ++NumElimCmp;
+ Changed = true;
+}
+
+bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) {
+ // Get the SCEVs for the ICmp operands.
+ auto *N = SE->getSCEV(SDiv->getOperand(0));
+ auto *D = SE->getSCEV(SDiv->getOperand(1));
+
+ // Simplify unnecessary loops away.
+ const Loop *L = LI->getLoopFor(SDiv->getParent());
+ N = SE->getSCEVAtScope(N, L);
+ D = SE->getSCEVAtScope(D, L);
+
+ // Replace sdiv by udiv if both of the operands are non-negative
+ if (SE->isKnownNonNegative(N) && SE->isKnownNonNegative(D)) {
+ auto *UDiv = BinaryOperator::Create(
+ BinaryOperator::UDiv, SDiv->getOperand(0), SDiv->getOperand(1),
+ SDiv->getName() + ".udiv", SDiv);
+ UDiv->setIsExact(SDiv->isExact());
+ SDiv->replaceAllUsesWith(UDiv);
+ DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n');
+ ++NumSimplifiedSDiv;
+ Changed = true;
+ DeadInsts.push_back(SDiv);
+ return true;
+ }
+
+ return false;
+}
+
+/// SimplifyIVUsers helper for eliminating useless
+/// remainder operations operating on an induction variable.
+void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
+ Value *IVOperand,
+ bool IsSigned) {
+ // We're only interested in the case where we know something about
+ // the numerator.
+ if (IVOperand != Rem->getOperand(0))
+ return;
+
+ // Get the SCEVs for the ICmp operands.
+ const SCEV *S = SE->getSCEV(Rem->getOperand(0));
+ const SCEV *X = SE->getSCEV(Rem->getOperand(1));
+
+ // Simplify unnecessary loops away.
+ const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
+ S = SE->getSCEVAtScope(S, ICmpLoop);
+ X = SE->getSCEVAtScope(X, ICmpLoop);
+
+ // i % n --> i if i is in [0,n).
+ if ((!IsSigned || SE->isKnownNonNegative(S)) &&
+ SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ S, X))
+ Rem->replaceAllUsesWith(Rem->getOperand(0));
+ else {
+ // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
+ const SCEV *LessOne = SE->getMinusSCEV(S, SE->getOne(S->getType()));
+ if (IsSigned && !SE->isKnownNonNegative(LessOne))
+ return;
+
+ if (!SE->isKnownPredicate(IsSigned ?
+ ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ LessOne, X))
+ return;
+
+ ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ,
+ Rem->getOperand(0), Rem->getOperand(1));
+ SelectInst *Sel =
+ SelectInst::Create(ICmp,
+ ConstantInt::get(Rem->getType(), 0),
+ Rem->getOperand(0), "tmp", Rem);
+ Rem->replaceAllUsesWith(Sel);
+ }
+
+ DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+ ++NumElimRem;
+ Changed = true;
+ DeadInsts.emplace_back(Rem);
+}
+
+bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
+ auto *F = CI->getCalledFunction();
+ if (!F)
+ return false;
+
+ typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)(
+ const SCEV *, const SCEV *, SCEV::NoWrapFlags, unsigned);
+ typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)(
+ const SCEV *, Type *, unsigned);
+
+ OperationFunctionTy Operation;
+ ExtensionFunctionTy Extension;
+
+ Instruction::BinaryOps RawOp;
+
+ // We always have exactly one of nsw or nuw. If NoSignedOverflow is false, we
+ // have nuw.
+ bool NoSignedOverflow;
+
+ switch (F->getIntrinsicID()) {
+ default:
+ return false;
+
+ case Intrinsic::sadd_with_overflow:
+ Operation = &ScalarEvolution::getAddExpr;
+ Extension = &ScalarEvolution::getSignExtendExpr;
+ RawOp = Instruction::Add;
+ NoSignedOverflow = true;
+ break;
+
+ case Intrinsic::uadd_with_overflow:
+ Operation = &ScalarEvolution::getAddExpr;
+ Extension = &ScalarEvolution::getZeroExtendExpr;
+ RawOp = Instruction::Add;
+ NoSignedOverflow = false;
+ break;
+
+ case Intrinsic::ssub_with_overflow:
+ Operation = &ScalarEvolution::getMinusSCEV;
+ Extension = &ScalarEvolution::getSignExtendExpr;
+ RawOp = Instruction::Sub;
+ NoSignedOverflow = true;
+ break;
+
+ case Intrinsic::usub_with_overflow:
+ Operation = &ScalarEvolution::getMinusSCEV;
+ Extension = &ScalarEvolution::getZeroExtendExpr;
+ RawOp = Instruction::Sub;
+ NoSignedOverflow = false;
+ break;
+ }
+
+ const SCEV *LHS = SE->getSCEV(CI->getArgOperand(0));
+ const SCEV *RHS = SE->getSCEV(CI->getArgOperand(1));
+
+ auto *NarrowTy = cast<IntegerType>(LHS->getType());
+ auto *WideTy =
+ IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);
+
+ const SCEV *A =
+ (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0),
+ WideTy, 0);
+ const SCEV *B =
+ (SE->*Operation)((SE->*Extension)(LHS, WideTy, 0),
+ (SE->*Extension)(RHS, WideTy, 0), SCEV::FlagAnyWrap, 0);
+
+ if (A != B)
+ return false;
+
+ // Proved no overflow, nuke the overflow check and, if possible, the overflow
+ // intrinsic as well.
+
+ BinaryOperator *NewResult = BinaryOperator::Create(
+ RawOp, CI->getArgOperand(0), CI->getArgOperand(1), "", CI);
+
+ if (NoSignedOverflow)
+ NewResult->setHasNoSignedWrap(true);
+ else
+ NewResult->setHasNoUnsignedWrap(true);
+
+ SmallVector<ExtractValueInst *, 4> ToDelete;
+
+ for (auto *U : CI->users()) {
+ if (auto *EVI = dyn_cast<ExtractValueInst>(U)) {
+ if (EVI->getIndices()[0] == 1)
+ EVI->replaceAllUsesWith(ConstantInt::getFalse(CI->getContext()));
+ else {
+ assert(EVI->getIndices()[0] == 0 && "Only two possibilities!");
+ EVI->replaceAllUsesWith(NewResult);
+ }
+ ToDelete.push_back(EVI);
+ }
+ }
+
+ for (auto *EVI : ToDelete)
+ EVI->eraseFromParent();
+
+ if (CI->use_empty())
+ CI->eraseFromParent();
+
+ return true;
+}
+
+/// Eliminate an operation that consumes a simple IV and has no observable
+/// side-effect given the range of IV values. IVOperand is guaranteed SCEVable,
+/// but UseInst may not be.
+bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
+ Instruction *IVOperand) {
+ if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+ eliminateIVComparison(ICmp, IVOperand);
+ return true;
+ }
+ if (BinaryOperator *Bin = dyn_cast<BinaryOperator>(UseInst)) {
+ bool IsSRem = Bin->getOpcode() == Instruction::SRem;
+ if (IsSRem || Bin->getOpcode() == Instruction::URem) {
+ eliminateIVRemainder(Bin, IVOperand, IsSRem);
+ return true;
+ }
+
+ if (Bin->getOpcode() == Instruction::SDiv)
+ return eliminateSDiv(Bin);
+ }
+
+ if (auto *CI = dyn_cast<CallInst>(UseInst))
+ if (eliminateOverflowIntrinsic(CI))
+ return true;
+
+ if (eliminateIdentitySCEV(UseInst, IVOperand))
+ return true;
+
+ return false;
+}
+
+/// Eliminate any operation that SCEV can prove is an identity function.
+bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
+ Instruction *IVOperand) {
+ if (!SE->isSCEVable(UseInst->getType()) ||
+ (UseInst->getType() != IVOperand->getType()) ||
+ (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
+ return false;
+
+ // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the
+ // dominator tree, even if X is an operand to Y. For instance, in
+ //
+ // %iv = phi i32 {0,+,1}
+ // br %cond, label %left, label %merge
+ //
+ // left:
+ // %X = add i32 %iv, 0
+ // br label %merge
+ //
+ // merge:
+ // %M = phi (%X, %iv)
+ //
+ // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and
+ // %M.replaceAllUsesWith(%X) would be incorrect.
+
+ if (isa<PHINode>(UseInst))
+ // If UseInst is not a PHI node then we know that IVOperand dominates
+ // UseInst directly from the legality of SSA.
+ if (!DT || !DT->dominates(IVOperand, UseInst))
+ return false;
+
+ if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand))
+ return false;
+
+ DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
+
+ UseInst->replaceAllUsesWith(IVOperand);
+ ++NumElimIdentity;
+ Changed = true;
+ DeadInsts.emplace_back(UseInst);
+ return true;
+}
+
+/// Annotate BO with nsw / nuw if it provably does not signed-overflow /
+/// unsigned-overflow. Returns true if anything changed, false otherwise.
+bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
+ Value *IVOperand) {
+
+ // Fastpath: we don't have any work to do if `BO` is `nuw` and `nsw`.
+ if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap())
+ return false;
+
+ const SCEV *(ScalarEvolution::*GetExprForBO)(const SCEV *, const SCEV *,
+ SCEV::NoWrapFlags, unsigned);
+ switch (BO->getOpcode()) {
+ default:
+ return false;
+
+ case Instruction::Add:
+ GetExprForBO = &ScalarEvolution::getAddExpr;
+ break;
+
+ case Instruction::Sub:
+ GetExprForBO = &ScalarEvolution::getMinusSCEV;
+ break;
+
+ case Instruction::Mul:
+ GetExprForBO = &ScalarEvolution::getMulExpr;
+ break;
+ }
+
+ unsigned BitWidth = cast<IntegerType>(BO->getType())->getBitWidth();
+ Type *WideTy = IntegerType::get(BO->getContext(), BitWidth * 2);
+ const SCEV *LHS = SE->getSCEV(BO->getOperand(0));
+ const SCEV *RHS = SE->getSCEV(BO->getOperand(1));
+
+ bool Changed = false;
+
+ if (!BO->hasNoUnsignedWrap()) {
+ const SCEV *ExtendAfterOp = SE->getZeroExtendExpr(SE->getSCEV(BO), WideTy);
+ const SCEV *OpAfterExtend = (SE->*GetExprForBO)(
+ SE->getZeroExtendExpr(LHS, WideTy), SE->getZeroExtendExpr(RHS, WideTy),
+ SCEV::FlagAnyWrap, 0u);
+ if (ExtendAfterOp == OpAfterExtend) {
+ BO->setHasNoUnsignedWrap();
+ SE->forgetValue(BO);
+ Changed = true;
+ }
+ }
+
+ if (!BO->hasNoSignedWrap()) {
+ const SCEV *ExtendAfterOp = SE->getSignExtendExpr(SE->getSCEV(BO), WideTy);
+ const SCEV *OpAfterExtend = (SE->*GetExprForBO)(
+ SE->getSignExtendExpr(LHS, WideTy), SE->getSignExtendExpr(RHS, WideTy),
+ SCEV::FlagAnyWrap, 0u);
+ if (ExtendAfterOp == OpAfterExtend) {
+ BO->setHasNoSignedWrap();
+ SE->forgetValue(BO);
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// Annotate the Shr in (X << IVOperand) >> C as exact using the
+/// information from the IV's range. Returns true if anything changed, false
+/// otherwise.
+bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO,
+ Value *IVOperand) {
+ using namespace llvm::PatternMatch;
+
+ if (BO->getOpcode() == Instruction::Shl) {
+ bool Changed = false;
+ ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand));
+ for (auto *U : BO->users()) {
+ const APInt *C;
+ if (match(U,
+ m_AShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C))) ||
+ match(U,
+ m_LShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C)))) {
+ BinaryOperator *Shr = cast<BinaryOperator>(U);
+ if (!Shr->isExact() && IVRange.getUnsignedMin().uge(*C)) {
+ Shr->setIsExact(true);
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+ }
+
+ return false;
+}
+
+/// Add all uses of Def to the current IV's worklist.
+static void pushIVUsers(
+ Instruction *Def,
+ SmallPtrSet<Instruction*,16> &Simplified,
+ SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
+
+ for (User *U : Def->users()) {
+ Instruction *UI = cast<Instruction>(U);
+
+ // Avoid infinite or exponential worklist processing.
+ // Also ensure unique worklist users.
+ // If Def is a LoopPhi, it may not be in the Simplified set, so check for
+ // self edges first.
+ if (UI != Def && Simplified.insert(UI).second)
+ SimpleIVUsers.push_back(std::make_pair(UI, Def));
+ }
+}
+
+/// Return true if this instruction generates a simple SCEV
+/// expression in terms of that IV.
+///
+/// This is similar to IVUsers' isInteresting() but processes each instruction
+/// non-recursively when the operand is already known to be a simpleIVUser.
+///
+static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
+ if (!SE->isSCEVable(I->getType()))
+ return false;
+
+ // Get the symbolic expression for this instruction.
+ const SCEV *S = SE->getSCEV(I);
+
+ // Only consider affine recurrences.
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
+ if (AR && AR->getLoop() == L)
+ return true;
+
+ return false;
+}
+
+/// Iteratively perform simplification on a worklist of users
+/// of the specified induction variable. Each successive simplification may push
+/// more users which may themselves be candidates for simplification.
+///
+/// This algorithm does not require IVUsers analysis. Instead, it simplifies
+/// instructions in-place during analysis. Rather than rewriting induction
+/// variables bottom-up from their users, it transforms a chain of IVUsers
+/// top-down, updating the IR only when it encounters a clear optimization
+/// opportunity.
+///
+/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
+///
+void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
+ if (!SE->isSCEVable(CurrIV->getType()))
+ return;
+
+ // Instructions processed by SimplifyIndvar for CurrIV.
+ SmallPtrSet<Instruction*,16> Simplified;
+
+ // Use-def pairs if IV users waiting to be processed for CurrIV.
+ SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
+
+ // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
+ // called multiple times for the same LoopPhi. This is the proper thing to
+ // do for loop header phis that use each other.
+ pushIVUsers(CurrIV, Simplified, SimpleIVUsers);
+
+ while (!SimpleIVUsers.empty()) {
+ std::pair<Instruction*, Instruction*> UseOper =
+ SimpleIVUsers.pop_back_val();
+ Instruction *UseInst = UseOper.first;
+
+ // Bypass back edges to avoid extra work.
+ if (UseInst == CurrIV) continue;
+
+ Instruction *IVOperand = UseOper.second;
+ for (unsigned N = 0; IVOperand; ++N) {
+ assert(N <= Simplified.size() && "runaway iteration");
+
+ Value *NewOper = foldIVUser(UseOper.first, IVOperand);
+ if (!NewOper)
+ break; // done folding
+ IVOperand = dyn_cast<Instruction>(NewOper);
+ }
+ if (!IVOperand)
+ continue;
+
+ if (eliminateIVUser(UseOper.first, IVOperand)) {
+ pushIVUsers(IVOperand, Simplified, SimpleIVUsers);
+ continue;
+ }
+
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseOper.first)) {
+ if ((isa<OverflowingBinaryOperator>(BO) &&
+ strengthenOverflowingOperation(BO, IVOperand)) ||
+ (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand))) {
+ // re-queue uses of the now modified binary operator and fall
+ // through to the checks that remain.
+ pushIVUsers(IVOperand, Simplified, SimpleIVUsers);
+ }
+ }
+
+ CastInst *Cast = dyn_cast<CastInst>(UseOper.first);
+ if (V && Cast) {
+ V->visitCast(Cast);
+ continue;
+ }
+ if (isSimpleIVUser(UseOper.first, L, SE)) {
+ pushIVUsers(UseOper.first, Simplified, SimpleIVUsers);
+ }
+ }
+}
+
+namespace llvm {
+
+void IVVisitor::anchor() { }
+
+/// Simplify instructions that use this induction variable
+/// by using ScalarEvolution to analyze the IV's recurrence.
+bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
+ LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead,
+ IVVisitor *V) {
+ SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Dead);
+ SIV.simplifyUsers(CurrIV, V);
+ return SIV.hasChanged();
+}
+
+/// Simplify users of induction variables within this
+/// loop. This does not actually change or add IVs.
+bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
+ LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead) {
+ bool Changed = false;
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead);
+ }
+ return Changed;
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
new file mode 100644
index 000000000000..2ea15f65cef9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -0,0 +1,152 @@
+//===------ SimplifyInstructions.cpp - Remove redundant instructions ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility pass used for testing the InstructionSimplify analysis.
+// The analysis is applied to every instruction, and if it simplifies then the
+// instruction is replaced by the simplification. If you are looking for a pass
+// that performs serious instruction folding, use the instcombine pass instead.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SimplifyInstructions.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "instsimplify"
+
+STATISTIC(NumSimplified, "Number of redundant instructions removed");
+
+static bool runImpl(Function &F, const SimplifyQuery &SQ,
+ OptimizationRemarkEmitter *ORE) {
+ SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+ bool Changed = false;
+
+ do {
+ for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
+ // Here be subtlety: the iterator must be incremented before the loop
+ // body (not sure why), so a range-for loop won't work here.
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
+ Instruction *I = &*BI++;
+ // The first time through the loop ToSimplify is empty and we try to
+ // simplify all instructions. On later iterations ToSimplify is not
+ // empty and we only bother simplifying instructions that are in it.
+ if (!ToSimplify->empty() && !ToSimplify->count(I))
+ continue;
+
+ // Don't waste time simplifying unused instructions.
+ if (!I->use_empty()) {
+ if (Value *V = SimplifyInstruction(I, SQ, ORE)) {
+ // Mark all uses for resimplification next time round the loop.
+ for (User *U : I->users())
+ Next->insert(cast<Instruction>(U));
+ I->replaceAllUsesWith(V);
+ ++NumSimplified;
+ Changed = true;
+ }
+ }
+ if (RecursivelyDeleteTriviallyDeadInstructions(I, SQ.TLI)) {
+ // RecursivelyDeleteTriviallyDeadInstruction can remove more than one
+ // instruction, so simply incrementing the iterator does not work.
+ // When instructions get deleted re-iterate instead.
+ BI = BB->begin();
+ BE = BB->end();
+ Changed = true;
+ }
+ }
+ }
+
+ // Place the list of instructions to simplify on the next loop iteration
+ // into ToSimplify.
+ std::swap(ToSimplify, Next);
+ Next->clear();
+ } while (!ToSimplify->empty());
+
+ return Changed;
+}
+
+namespace {
+ struct InstSimplifier : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ InstSimplifier() : FunctionPass(ID) {
+ initializeInstSimplifierPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+ }
+
+ /// runOnFunction - Remove instructions that simplify.
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+
+ const DominatorTree *DT =
+ &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ const TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ AssumptionCache *AC =
+ &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ OptimizationRemarkEmitter *ORE =
+ &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ const SimplifyQuery SQ(DL, TLI, DT, AC);
+ return runImpl(F, SQ, ORE);
+ }
+ };
+}
+
+char InstSimplifier::ID = 0;
+INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify",
+ "Remove redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_END(InstSimplifier, "instsimplify",
+ "Remove redundant instructions", false, false)
+char &llvm::InstructionSimplifierID = InstSimplifier::ID;
+
+// Public interface to the simplify instructions pass.
+FunctionPass *llvm::createInstructionSimplifierPass() {
+ return new InstSimplifier();
+}
+
+PreservedAnalyses InstSimplifierPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ const SimplifyQuery SQ(DL, &TLI, &DT, &AC);
+ bool Changed = runImpl(F, SQ, &ORE);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
new file mode 100644
index 000000000000..77c0a41929ac
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -0,0 +1,2440 @@
+//===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility pass used for testing the InstructionSimplify analysis.
+// The analysis is applied to every instruction, and if it simplifies then the
+// instruction is replaced by the simplification. If you are looking for a pass
+// that performs serious instruction folding, use the instcombine pass instead.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+using namespace PatternMatch;
+
+static cl::opt<bool>
+ EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
+ cl::init(false),
+ cl::desc("Enable unsafe double to float "
+ "shrinking for math lib calls"));
+
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+static bool ignoreCallingConv(LibFunc Func) {
+ return Func == LibFunc_abs || Func == LibFunc_labs ||
+ Func == LibFunc_llabs || Func == LibFunc_strlen;
+}
+
+static bool isCallingConvCCompatible(CallInst *CI) {
+ switch(CI->getCallingConv()) {
+ default:
+ return false;
+ case llvm::CallingConv::C:
+ return true;
+ case llvm::CallingConv::ARM_APCS:
+ case llvm::CallingConv::ARM_AAPCS:
+ case llvm::CallingConv::ARM_AAPCS_VFP: {
+
+ // The iOS ABI diverges from the standard in some cases, so for now don't
+ // try to simplify those calls.
+ if (Triple(CI->getModule()->getTargetTriple()).isiOS())
+ return false;
+
+ auto *FuncTy = CI->getFunctionType();
+
+ if (!FuncTy->getReturnType()->isPointerTy() &&
+ !FuncTy->getReturnType()->isIntegerTy() &&
+ !FuncTy->getReturnType()->isVoidTy())
+ return false;
+
+ for (auto Param : FuncTy->params()) {
+ if (!Param->isPointerTy() && !Param->isIntegerTy())
+ return false;
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Return true if it is only used in equality comparisons with With.
+static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
+ for (User *U : V->users()) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
+ if (IC->isEquality() && IC->getOperand(1) == With)
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+static bool callHasFloatingPointArgument(const CallInst *CI) {
+ return any_of(CI->operands(), [](const Use &OI) {
+ return OI->getType()->isFloatingPointTy();
+ });
+}
+
+/// \brief Check whether the overloaded unary floating point function
+/// corresponding to \a Ty is available.
+static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
+ LibFunc DoubleFn, LibFunc FloatFn,
+ LibFunc LongDoubleFn) {
+ switch (Ty->getTypeID()) {
+ case Type::FloatTyID:
+ return TLI->has(FloatFn);
+ case Type::DoubleTyID:
+ return TLI->has(DoubleFn);
+ default:
+ return TLI->has(LongDoubleFn);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// String and Memory Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) {
+ // Extract some information from the instruction
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0)
+ return nullptr;
+ --Len; // Unbias length.
+
+ // Handle the simple, do-nothing case: strcat(x, "") -> x
+ if (Len == 0)
+ return Dst;
+
+ return emitStrLenMemCpy(Src, Dst, Len, B);
+}
+
+Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
+ IRBuilder<> &B) {
+ // We need to find the end of the destination string. That's where the
+ // memory is to be moved to. We just generate a call to strlen.
+ Value *DstLen = emitStrLen(Dst, B, DL, TLI);
+ if (!DstLen)
+ return nullptr;
+
+ // Now that we have the destination's length, we must index into the
+ // destination's pointer to get the actual memcpy destination (end of
+ // the string .. we're concatenating).
+ Value *CpyDst = B.CreateGEP(B.getInt8Ty(), Dst, DstLen, "endptr");
+
+ // We have enough information to now generate the memcpy call to do the
+ // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
+ B.CreateMemCpy(CpyDst, Src,
+ ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1),
+ 1);
+ return Dst;
+}
+
+Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) {
+ // Extract some information from the instruction.
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ uint64_t Len;
+
+ // We don't do anything if length is not constant.
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ Len = LengthArg->getZExtValue();
+ else
+ return nullptr;
+
+ // See if we can get the length of the input string.
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen == 0)
+ return nullptr;
+ --SrcLen; // Unbias length.
+
+ // Handle the simple, do-nothing cases:
+ // strncat(x, "", c) -> x
+ // strncat(x, c, 0) -> x
+ if (SrcLen == 0 || Len == 0)
+ return Dst;
+
+ // We don't optimize this case.
+ if (Len < SrcLen)
+ return nullptr;
+
+ // strncat(x, s, c) -> strcat(x, s)
+ // s is constant so the strcat can be optimized further.
+ return emitStrLenMemCpy(Src, Dst, SrcLen, B);
+}
+
+Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ Value *SrcStr = CI->getArgOperand(0);
+
+ // If the second operand is non-constant, see if we can compute the length
+ // of the input string and turn this into memchr.
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ if (!CharC) {
+ uint64_t Len = GetStringLength(SrcStr);
+ if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32.
+ return nullptr;
+
+ return emitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len),
+ B, DL, TLI);
+ }
+
+ // Otherwise, the character is a constant, see if the first argument is
+ // a string literal. If so, we can constant fold.
+ StringRef Str;
+ if (!getConstantStringInfo(SrcStr, Str)) {
+ if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
+ return B.CreateGEP(B.getInt8Ty(), SrcStr, emitStrLen(SrcStr, B, DL, TLI),
+ "strchr");
+ return nullptr;
+ }
+
+ // Compute the offset, make sure to handle the case when we're searching for
+ // zero (a weird way to spell strlen).
+ size_t I = (0xFF & CharC->getSExtValue()) == 0
+ ? Str.size()
+ : Str.find(CharC->getSExtValue());
+ if (I == StringRef::npos) // Didn't find the char. strchr returns null.
+ return Constant::getNullValue(CI->getType());
+
+ // strchr(s+n,c) -> gep(s+n+i,c)
+ return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr");
+}
+
+Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
+ Value *SrcStr = CI->getArgOperand(0);
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+
+ // Cannot fold anything if we're not looking for a constant.
+ if (!CharC)
+ return nullptr;
+
+ StringRef Str;
+ if (!getConstantStringInfo(SrcStr, Str)) {
+ // strrchr(s, 0) -> strchr(s, 0)
+ if (CharC->isZero())
+ return emitStrChr(SrcStr, '\0', B, TLI);
+ return nullptr;
+ }
+
+ // Compute the offset.
+ size_t I = (0xFF & CharC->getSExtValue()) == 0
+ ? Str.size()
+ : Str.rfind(CharC->getSExtValue());
+ if (I == StringRef::npos) // Didn't find the char. Return null.
+ return Constant::getNullValue(CI->getType());
+
+ // strrchr(s+n,c) -> gep(s+n+i,c)
+ return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strrchr");
+}
+
+Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+ if (Str1P == Str2P) // strcmp(x,x) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ StringRef Str1, Str2;
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+
+ // strcmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2)
+ return ConstantInt::get(CI->getType(), Str1.compare(Str2));
+
+ if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
+ return B.CreateNeg(
+ B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()));
+
+ if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+ // strcmp(P, "x") -> memcmp(P, "x", 2)
+ uint64_t Len1 = GetStringLength(Str1P);
+ uint64_t Len2 = GetStringLength(Str2P);
+ if (Len1 && Len2) {
+ return emitMemCmp(Str1P, Str2P,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()),
+ std::min(Len1, Len2)),
+ B, DL, TLI);
+ }
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+ if (Str1P == Str2P) // strncmp(x,x,n) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ // Get the length argument if it is constant.
+ uint64_t Length;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ Length = LengthArg->getZExtValue();
+ else
+ return nullptr;
+
+ if (Length == 0) // strncmp(x,y,0) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+ return emitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI);
+
+ StringRef Str1, Str2;
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+
+ // strncmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2) {
+ StringRef SubStr1 = Str1.substr(0, Length);
+ StringRef SubStr2 = Str2.substr(0, Length);
+ return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
+ }
+
+ if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
+ return B.CreateNeg(
+ B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()));
+
+ if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) // strcpy(x,x) -> x
+ return Src;
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0)
+ return nullptr;
+
+ // We have enough information to now generate the memcpy call to do the
+ // copy for us. Make a memcpy to copy the nul byte with align = 1.
+ B.CreateMemCpy(Dst, Src,
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), 1);
+ return Dst;
+}
+
+Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
+ Value *StrLen = emitStrLen(Src, B, DL, TLI);
+ return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
+ }
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0)
+ return nullptr;
+
+ Type *PT = Callee->getFunctionType()->getParamType(0);
+ Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len);
+ Value *DstEnd = B.CreateGEP(B.getInt8Ty(), Dst,
+ ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
+
+ // We have enough information to now generate the memcpy call to do the
+ // copy for us. Make a memcpy to copy the nul byte with align = 1.
+ B.CreateMemCpy(Dst, Src, LenV, 1);
+ return DstEnd;
+}
+
+Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ Value *LenOp = CI->getArgOperand(2);
+
+ // See if we can get the length of the input string.
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen == 0)
+ return nullptr;
+ --SrcLen;
+
+ if (SrcLen == 0) {
+ // strncpy(x, "", y) -> memset(x, '\0', y, 1)
+ B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
+ return Dst;
+ }
+
+ uint64_t Len;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
+ Len = LengthArg->getZExtValue();
+ else
+ return nullptr;
+
+ if (Len == 0)
+ return Dst; // strncpy(x, y, 0) -> x
+
+ // Let strncpy handle the zero padding
+ if (Len > SrcLen + 1)
+ return nullptr;
+
+ Type *PT = Callee->getFunctionType()->getParamType(0);
+ // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
+ B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1);
+
+ return Dst;
+}
+
+Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B,
+ unsigned CharSize) {
+ Value *Src = CI->getArgOperand(0);
+
+ // Constant folding: strlen("xyz") -> 3
+ if (uint64_t Len = GetStringLength(Src, CharSize))
+ return ConstantInt::get(CI->getType(), Len - 1);
+
+ // If s is a constant pointer pointing to a string literal, we can fold
+ // strlen(s + x) to strlen(s) - x, when x is known to be in the range
+ // [0, strlen(s)] or the string has a single null terminator '\0' at the end.
+ // We only try to simplify strlen when the pointer s points to an array
+ // of i8. Otherwise, we would need to scale the offset x before doing the
+ // subtraction. This will make the optimization more complex, and it's not
+ // very useful because calling strlen for a pointer of other types is
+ // very uncommon.
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) {
+ if (!isGEPBasedOnPointerToString(GEP, CharSize))
+ return nullptr;
+
+ ConstantDataArraySlice Slice;
+ if (getConstantDataArrayInfo(GEP->getOperand(0), Slice, CharSize)) {
+ uint64_t NullTermIdx;
+ if (Slice.Array == nullptr) {
+ NullTermIdx = 0;
+ } else {
+ NullTermIdx = ~((uint64_t)0);
+ for (uint64_t I = 0, E = Slice.Length; I < E; ++I) {
+ if (Slice.Array->getElementAsInteger(I + Slice.Offset) == 0) {
+ NullTermIdx = I;
+ break;
+ }
+ }
+ // If the string does not have '\0', leave it to strlen to compute
+ // its length.
+ if (NullTermIdx == ~((uint64_t)0))
+ return nullptr;
+ }
+
+ Value *Offset = GEP->getOperand(2);
+ KnownBits Known = computeKnownBits(Offset, DL, 0, nullptr, CI, nullptr);
+ Known.Zero.flipAllBits();
+ uint64_t ArrSize =
+ cast<ArrayType>(GEP->getSourceElementType())->getNumElements();
+
+ // KnownZero's bits are flipped, so zeros in KnownZero now represent
+ // bits known to be zeros in Offset, and ones in KnowZero represent
+ // bits unknown in Offset. Therefore, Offset is known to be in range
+ // [0, NullTermIdx] when the flipped KnownZero is non-negative and
+ // unsigned-less-than NullTermIdx.
+ //
+ // If Offset is not provably in the range [0, NullTermIdx], we can still
+ // optimize if we can prove that the program has undefined behavior when
+ // Offset is outside that range. That is the case when GEP->getOperand(0)
+ // is a pointer to an object whose memory extent is NullTermIdx+1.
+ if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) ||
+ (GEP->isInBounds() && isa<GlobalVariable>(GEP->getOperand(0)) &&
+ NullTermIdx == ArrSize - 1)) {
+ Offset = B.CreateSExtOrTrunc(Offset, CI->getType());
+ return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx),
+ Offset);
+ }
+ }
+
+ return nullptr;
+ }
+
+ // strlen(x?"foo":"bars") --> x ? 3 : 4
+ if (SelectInst *SI = dyn_cast<SelectInst>(Src)) {
+ uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize);
+ uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize);
+ if (LenTrue && LenFalse) {
+ Function *Caller = CI->getParent()->getParent();
+ emitOptimizationRemark(CI->getContext(), "simplify-libcalls", *Caller,
+ SI->getDebugLoc(),
+ "folded strlen(select) to select of constants");
+ return B.CreateSelect(SI->getCondition(),
+ ConstantInt::get(CI->getType(), LenTrue - 1),
+ ConstantInt::get(CI->getType(), LenFalse - 1));
+ }
+ }
+
+ // strlen(x) != 0 --> *x != 0
+ // strlen(x) == 0 --> *x == 0
+ if (isOnlyUsedInZeroEqualityComparison(CI))
+ return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
+ return optimizeStringLength(CI, B, 8);
+}
+
+Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) {
+ Module &M = *CI->getParent()->getParent()->getParent();
+ unsigned WCharSize = TLI->getWCharSize(M) * 8;
+
+ return optimizeStringLength(CI, B, WCharSize);
+}
+
+Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) {
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strpbrk(s, "") -> nullptr
+ // strpbrk("", s) -> nullptr
+ if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t I = S1.find_first_of(S2);
+ if (I == StringRef::npos) // No match.
+ return Constant::getNullValue(CI->getType());
+
+ return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I),
+ "strpbrk");
+ }
+
+ // strpbrk(s, "a") -> strchr(s, 'a')
+ if (HasS2 && S2.size() == 1)
+ return emitStrChr(CI->getArgOperand(0), S2[0], B, TLI);
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilder<> &B) {
+ Value *EndPtr = CI->getArgOperand(1);
+ if (isa<ConstantPointerNull>(EndPtr)) {
+ // With a null EndPtr, this function won't capture the main argument.
+ // It would be readonly too, except that it still may write to errno.
+ CI->addParamAttr(0, Attribute::NoCapture);
+ }
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeStrSpn(CallInst *CI, IRBuilder<> &B) {
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strspn(s, "") -> 0
+ // strspn("", s) -> 0
+ if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t Pos = S1.find_first_not_of(S2);
+ if (Pos == StringRef::npos)
+ Pos = S1.size();
+ return ConstantInt::get(CI->getType(), Pos);
+ }
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilder<> &B) {
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strcspn("", s) -> 0
+ if (HasS1 && S1.empty())
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t Pos = S1.find_first_of(S2);
+ if (Pos == StringRef::npos)
+ Pos = S1.size();
+ return ConstantInt::get(CI->getType(), Pos);
+ }
+
+ // strcspn(s, "") -> strlen(s)
+ if (HasS2 && S2.empty())
+ return emitStrLen(CI->getArgOperand(0), B, DL, TLI);
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) {
+ // fold strstr(x, x) -> x.
+ if (CI->getArgOperand(0) == CI->getArgOperand(1))
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+
+ // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
+ if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
+ Value *StrLen = emitStrLen(CI->getArgOperand(1), B, DL, TLI);
+ if (!StrLen)
+ return nullptr;
+ Value *StrNCmp = emitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
+ StrLen, B, DL, TLI);
+ if (!StrNCmp)
+ return nullptr;
+ for (auto UI = CI->user_begin(), UE = CI->user_end(); UI != UE;) {
+ ICmpInst *Old = cast<ICmpInst>(*UI++);
+ Value *Cmp =
+ B.CreateICmp(Old->getPredicate(), StrNCmp,
+ ConstantInt::getNullValue(StrNCmp->getType()), "cmp");
+ replaceAllUsesWith(Old, Cmp);
+ }
+ return CI;
+ }
+
+ // See if either input string is a constant string.
+ StringRef SearchStr, ToFindStr;
+ bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr);
+ bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr);
+
+ // fold strstr(x, "") -> x.
+ if (HasStr2 && ToFindStr.empty())
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+
+ // If both strings are known, constant fold it.
+ if (HasStr1 && HasStr2) {
+ size_t Offset = SearchStr.find(ToFindStr);
+
+ if (Offset == StringRef::npos) // strstr("foo", "bar") -> null
+ return Constant::getNullValue(CI->getType());
+
+ // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
+ Value *Result = castToCStr(CI->getArgOperand(0), B);
+ Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
+ return B.CreateBitCast(Result, CI->getType());
+ }
+
+ // fold strstr(x, "y") -> strchr(x, 'y').
+ if (HasStr2 && ToFindStr.size() == 1) {
+ Value *StrChr = emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI);
+ return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr;
+ }
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
+ Value *SrcStr = CI->getArgOperand(0);
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+
+ // memchr(x, y, 0) -> null
+ if (LenC && LenC->isZero())
+ return Constant::getNullValue(CI->getType());
+
+ // From now on we need at least constant length and string.
+ StringRef Str;
+ if (!LenC || !getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false))
+ return nullptr;
+
+ // Truncate the string to LenC. If Str is smaller than LenC we will still only
+ // scan the string, as reading past the end of it is undefined and we can just
+ // return null if we don't find the char.
+ Str = Str.substr(0, LenC->getZExtValue());
+
+ // If the char is variable but the input str and length are not we can turn
+ // this memchr call into a simple bit field test. Of course this only works
+ // when the return value is only checked against null.
+ //
+ // It would be really nice to reuse switch lowering here but we can't change
+ // the CFG at this point.
+ //
+ // memchr("\r\n", C, 2) != nullptr -> (C & ((1 << '\r') | (1 << '\n'))) != 0
+ // after bounds check.
+ if (!CharC && !Str.empty() && isOnlyUsedInZeroEqualityComparison(CI)) {
+ unsigned char Max =
+ *std::max_element(reinterpret_cast<const unsigned char *>(Str.begin()),
+ reinterpret_cast<const unsigned char *>(Str.end()));
+
+ // Make sure the bit field we're about to create fits in a register on the
+ // target.
+ // FIXME: On a 64 bit architecture this prevents us from using the
+ // interesting range of alpha ascii chars. We could do better by emitting
+ // two bitfields or shifting the range by 64 if no lower chars are used.
+ if (!DL.fitsInLegalInteger(Max + 1))
+ return nullptr;
+
+ // For the bit field use a power-of-2 type with at least 8 bits to avoid
+ // creating unnecessary illegal types.
+ unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max));
+
+ // Now build the bit field.
+ APInt Bitfield(Width, 0);
+ for (char C : Str)
+ Bitfield.setBit((unsigned char)C);
+ Value *BitfieldC = B.getInt(Bitfield);
+
+ // First check that the bit field access is within bounds.
+ Value *C = B.CreateZExtOrTrunc(CI->getArgOperand(1), BitfieldC->getType());
+ Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width),
+ "memchr.bounds");
+
+ // Create code that checks if the given bit is set in the field.
+ Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C);
+ Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits");
+
+ // Finally merge both checks and cast to pointer type. The inttoptr
+ // implicitly zexts the i1 to intptr type.
+ return B.CreateIntToPtr(B.CreateAnd(Bounds, Bits, "memchr"), CI->getType());
+ }
+
+ // Check if all arguments are constants. If so, we can constant fold.
+ if (!CharC)
+ return nullptr;
+
+ // Compute the offset.
+ size_t I = Str.find(CharC->getSExtValue() & 0xFF);
+ if (I == StringRef::npos) // Didn't find the char. memchr returns null.
+ return Constant::getNullValue(CI->getType());
+
+ // memchr(s+n,c,l) -> gep(s+n+i,c)
+ return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr");
+}
+
+Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
+ Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
+
+ if (LHS == RHS) // memcmp(s,s,x) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ // Make sure we have a constant length.
+ ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (!LenC)
+ return nullptr;
+
+ uint64_t Len = LenC->getZExtValue();
+ if (Len == 0) // memcmp(s1,s2,0) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
+ if (Len == 1) {
+ Value *LHSV = B.CreateZExt(B.CreateLoad(castToCStr(LHS, B), "lhsc"),
+ CI->getType(), "lhsv");
+ Value *RHSV = B.CreateZExt(B.CreateLoad(castToCStr(RHS, B), "rhsc"),
+ CI->getType(), "rhsv");
+ return B.CreateSub(LHSV, RHSV, "chardiff");
+ }
+
+ // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0
+ if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) {
+
+ IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8);
+ unsigned PrefAlignment = DL.getPrefTypeAlignment(IntType);
+
+ if (getKnownAlignment(LHS, DL, CI) >= PrefAlignment &&
+ getKnownAlignment(RHS, DL, CI) >= PrefAlignment) {
+
+ Type *LHSPtrTy =
+ IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());
+ Type *RHSPtrTy =
+ IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
+
+ Value *LHSV =
+ B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv");
+ Value *RHSV =
+ B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv");
+
+ return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
+ }
+ }
+
+ // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
+ StringRef LHSStr, RHSStr;
+ if (getConstantStringInfo(LHS, LHSStr) &&
+ getConstantStringInfo(RHS, RHSStr)) {
+ // Make sure we're not reading out-of-bounds memory.
+ if (Len > LHSStr.size() || Len > RHSStr.size())
+ return nullptr;
+ // Fold the memcmp and normalize the result. This way we get consistent
+ // results across multiple platforms.
+ uint64_t Ret = 0;
+ int Cmp = memcmp(LHSStr.data(), RHSStr.data(), Len);
+ if (Cmp < 0)
+ Ret = -1;
+ else if (Cmp > 0)
+ Ret = 1;
+ return ConstantInt::get(CI->getType(), Ret);
+ }
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
+ // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+}
+
+Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
+ // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+}
+
+// TODO: Does this belong in BuildLibCalls or should all of those similar
+// functions be moved here?
+static Value *emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs,
+ IRBuilder<> &B, const TargetLibraryInfo &TLI) {
+ LibFunc Func;
+ if (!TLI.getLibFunc("calloc", Func) || !TLI.has(Func))
+ return nullptr;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ const DataLayout &DL = M->getDataLayout();
+ IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext()));
+ Value *Calloc = M->getOrInsertFunction("calloc", Attrs, B.getInt8PtrTy(),
+ PtrType, PtrType);
+ CallInst *CI = B.CreateCall(Calloc, { Num, Size }, "calloc");
+
+ if (const auto *F = dyn_cast<Function>(Calloc->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n).
+static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,
+ const TargetLibraryInfo &TLI) {
+ // This has to be a memset of zeros (bzero).
+ auto *FillValue = dyn_cast<ConstantInt>(Memset->getArgOperand(1));
+ if (!FillValue || FillValue->getZExtValue() != 0)
+ return nullptr;
+
+ // TODO: We should handle the case where the malloc has more than one use.
+ // This is necessary to optimize common patterns such as when the result of
+ // the malloc is checked against null or when a memset intrinsic is used in
+ // place of a memset library call.
+ auto *Malloc = dyn_cast<CallInst>(Memset->getArgOperand(0));
+ if (!Malloc || !Malloc->hasOneUse())
+ return nullptr;
+
+ // Is the inner call really malloc()?
+ Function *InnerCallee = Malloc->getCalledFunction();
+ if (!InnerCallee)
+ return nullptr;
+
+ LibFunc Func;
+ if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
+ Func != LibFunc_malloc)
+ return nullptr;
+
+ // The memset must cover the same number of bytes that are malloc'd.
+ if (Memset->getArgOperand(2) != Malloc->getArgOperand(0))
+ return nullptr;
+
+ // Replace the malloc with a calloc. We need the data layout to know what the
+ // actual size of a 'size_t' parameter is.
+ B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator());
+ const DataLayout &DL = Malloc->getModule()->getDataLayout();
+ IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext());
+ Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1),
+ Malloc->getArgOperand(0), Malloc->getAttributes(),
+ B, TLI);
+ if (!Calloc)
+ return nullptr;
+
+ Malloc->replaceAllUsesWith(Calloc);
+ Malloc->eraseFromParent();
+
+ return Calloc;
+}
+
+Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
+ if (auto *Calloc = foldMallocMemset(CI, B, *TLI))
+ return Calloc;
+
+ // memset(p, v, n) -> llvm.memset(p, v, n, 1)
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+}
+
+//===----------------------------------------------------------------------===//
+// Math Library Optimizations
+//===----------------------------------------------------------------------===//
+
+/// Return a variant of Val with float type.
+/// Currently this works in two cases: If Val is an FPExtension of a float
+/// value to something bigger, simply return the operand.
+/// If Val is a ConstantFP but can be converted to a float ConstantFP without
+/// loss of precision do so.
+static Value *valueHasFloatPrecision(Value *Val) {
+ if (FPExtInst *Cast = dyn_cast<FPExtInst>(Val)) {
+ Value *Op = Cast->getOperand(0);
+ if (Op->getType()->isFloatTy())
+ return Op;
+ }
+ if (ConstantFP *Const = dyn_cast<ConstantFP>(Val)) {
+ APFloat F = Const->getValueAPF();
+ bool losesInfo;
+ (void)F.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ if (!losesInfo)
+ return ConstantFP::get(Const->getContext(), F);
+ }
+ return nullptr;
+}
+
+/// Shrink double -> float for unary functions like 'floor'.
+static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
+ bool CheckRetType) {
+ Function *Callee = CI->getCalledFunction();
+ // We know this libcall has a valid prototype, but we don't know which.
+ if (!CI->getType()->isDoubleTy())
+ return nullptr;
+
+ if (CheckRetType) {
+ // Check if all the uses for function like 'sin' are converted to float.
+ for (User *U : CI->users()) {
+ FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
+ if (!Cast || !Cast->getType()->isFloatTy())
+ return nullptr;
+ }
+ }
+
+ // If this is something like 'floor((double)floatval)', convert to floorf.
+ Value *V = valueHasFloatPrecision(CI->getArgOperand(0));
+ if (V == nullptr)
+ return nullptr;
+
+ // If call isn't an intrinsic, check that it isn't within a function with the
+ // same name as the float version of this call.
+ //
+ // e.g. inline float expf(float val) { return (float) exp((double) val); }
+ //
+ // A similar such definition exists in the MinGW-w64 math.h header file which
+ // when compiled with -O2 -ffast-math causes the generation of infinite loops
+ // where expf is called.
+ if (!Callee->isIntrinsic()) {
+ const Function *F = CI->getFunction();
+ StringRef FName = F->getName();
+ StringRef CalleeName = Callee->getName();
+ if ((FName.size() == (CalleeName.size() + 1)) &&
+ (FName.back() == 'f') &&
+ FName.startswith(CalleeName))
+ return nullptr;
+ }
+
+ // Propagate fast-math flags from the existing call to the new call.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(CI->getFastMathFlags());
+
+ // floor((double)floatval) -> (double)floorf(floatval)
+ if (Callee->isIntrinsic()) {
+ Module *M = CI->getModule();
+ Intrinsic::ID IID = Callee->getIntrinsicID();
+ Function *F = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
+ V = B.CreateCall(F, V);
+ } else {
+ // The call is a library call rather than an intrinsic.
+ V = emitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
+ }
+
+ return B.CreateFPExt(V, B.getDoubleTy());
+}
+
+// Replace a libcall \p CI with a call to intrinsic \p IID
+static Value *replaceUnaryCall(CallInst *CI, IRBuilder<> &B, Intrinsic::ID IID) {
+ // Propagate fast-math flags from the existing call to the new call.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(CI->getFastMathFlags());
+
+ Module *M = CI->getModule();
+ Value *V = CI->getArgOperand(0);
+ Function *F = Intrinsic::getDeclaration(M, IID, CI->getType());
+ CallInst *NewCall = B.CreateCall(F, V);
+ NewCall->takeName(CI);
+ return NewCall;
+}
+
+/// Shrink double -> float for binary functions like 'fmin/fmax'.
+static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ // We know this libcall has a valid prototype, but we don't know which.
+ if (!CI->getType()->isDoubleTy())
+ return nullptr;
+
+ // If this is something like 'fmin((double)floatval1, (double)floatval2)',
+ // or fmin(1.0, (double)floatval), then we convert it to fminf.
+ Value *V1 = valueHasFloatPrecision(CI->getArgOperand(0));
+ if (V1 == nullptr)
+ return nullptr;
+ Value *V2 = valueHasFloatPrecision(CI->getArgOperand(1));
+ if (V2 == nullptr)
+ return nullptr;
+
+ // Propagate fast-math flags from the existing call to the new call.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(CI->getFastMathFlags());
+
+ // fmin((double)floatval1, (double)floatval2)
+ // -> (double)fminf(floatval1, floatval2)
+ // TODO: Handle intrinsics in the same way as in optimizeUnaryDoubleFP().
+ Value *V = emitBinaryFloatFnCall(V1, V2, Callee->getName(), B,
+ Callee->getAttributes());
+ return B.CreateFPExt(V, B.getDoubleTy());
+}
+
+Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ Value *Ret = nullptr;
+ StringRef Name = Callee->getName();
+ if (UnsafeFPShrink && Name == "cos" && hasFloatVersion(Name))
+ Ret = optimizeUnaryDoubleFP(CI, B, true);
+
+ // cos(-x) -> cos(x)
+ Value *Op1 = CI->getArgOperand(0);
+ if (BinaryOperator::isFNeg(Op1)) {
+ BinaryOperator *BinExpr = cast<BinaryOperator>(Op1);
+ return B.CreateCall(Callee, BinExpr->getOperand(1), "cos");
+ }
+ return Ret;
+}
+
+static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
+ // Multiplications calculated using Addition Chains.
+ // Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html
+
+ assert(Exp != 0 && "Incorrect exponent 0 not handled");
+
+ if (InnerChain[Exp])
+ return InnerChain[Exp];
+
+ static const unsigned AddChain[33][2] = {
+ {0, 0}, // Unused.
+ {0, 0}, // Unused (base case = pow1).
+ {1, 1}, // Unused (pre-computed).
+ {1, 2}, {2, 2}, {2, 3}, {3, 3}, {2, 5}, {4, 4},
+ {1, 8}, {5, 5}, {1, 10}, {6, 6}, {4, 9}, {7, 7},
+ {3, 12}, {8, 8}, {8, 9}, {2, 16}, {1, 18}, {10, 10},
+ {6, 15}, {11, 11}, {3, 20}, {12, 12}, {8, 17}, {13, 13},
+ {3, 24}, {14, 14}, {4, 25}, {15, 15}, {3, 28}, {16, 16},
+ };
+
+ InnerChain[Exp] = B.CreateFMul(getPow(InnerChain, AddChain[Exp][0], B),
+ getPow(InnerChain, AddChain[Exp][1], B));
+ return InnerChain[Exp];
+}
+
+Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ Value *Ret = nullptr;
+ StringRef Name = Callee->getName();
+ if (UnsafeFPShrink && Name == "pow" && hasFloatVersion(Name))
+ Ret = optimizeUnaryDoubleFP(CI, B, true);
+
+ Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
+
+ // pow(1.0, x) -> 1.0
+ if (match(Op1, m_SpecificFP(1.0)))
+ return Op1;
+ // pow(2.0, x) -> llvm.exp2(x)
+ if (match(Op1, m_SpecificFP(2.0))) {
+ Value *Exp2 = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::exp2,
+ CI->getType());
+ return B.CreateCall(Exp2, Op2, "exp2");
+ }
+
+ // There's no llvm.exp10 intrinsic yet, but, maybe, some day there will
+ // be one.
+ if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
+ // pow(10.0, x) -> exp10(x)
+ if (Op1C->isExactlyValue(10.0) &&
+ hasUnaryFloatFn(TLI, Op1->getType(), LibFunc_exp10, LibFunc_exp10f,
+ LibFunc_exp10l))
+ return emitUnaryFloatFnCall(Op2, TLI->getName(LibFunc_exp10), B,
+ Callee->getAttributes());
+ }
+
+ // pow(exp(x), y) -> exp(x * y)
+ // pow(exp2(x), y) -> exp2(x * y)
+ // We enable these only with fast-math. Besides rounding differences, the
+ // transformation changes overflow and underflow behavior quite dramatically.
+ // Example: x = 1000, y = 0.001.
+ // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
+ auto *OpC = dyn_cast<CallInst>(Op1);
+ if (OpC && OpC->hasUnsafeAlgebra() && CI->hasUnsafeAlgebra()) {
+ LibFunc Func;
+ Function *OpCCallee = OpC->getCalledFunction();
+ if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) &&
+ TLI->has(Func) && (Func == LibFunc_exp || Func == LibFunc_exp2)) {
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(CI->getFastMathFlags());
+ Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul");
+ return emitUnaryFloatFnCall(FMul, OpCCallee->getName(), B,
+ OpCCallee->getAttributes());
+ }
+ }
+
+ ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
+ if (!Op2C)
+ return Ret;
+
+ if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
+ return ConstantFP::get(CI->getType(), 1.0);
+
+ if (Op2C->isExactlyValue(-0.5) &&
+ hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf,
+ LibFunc_sqrtl)) {
+ // If -ffast-math:
+ // pow(x, -0.5) -> 1.0 / sqrt(x)
+ if (CI->hasUnsafeAlgebra()) {
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(CI->getFastMathFlags());
+
+ // TODO: If the pow call is an intrinsic, we should lower to the sqrt
+ // intrinsic, so we match errno semantics. We also should check that the
+ // target can in fact lower the sqrt intrinsic -- we currently have no way
+ // to ask this question other than asking whether the target has a sqrt
+ // libcall, which is a sufficient but not necessary condition.
+ Value *Sqrt = emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc_sqrt), B,
+ Callee->getAttributes());
+
+ return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Sqrt, "sqrtrecip");
+ }
+ }
+
+ if (Op2C->isExactlyValue(0.5) &&
+ hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf,
+ LibFunc_sqrtl)) {
+
+ // In -ffast-math, pow(x, 0.5) -> sqrt(x).
+ if (CI->hasUnsafeAlgebra()) {
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(CI->getFastMathFlags());
+
+ // TODO: As above, we should lower to the sqrt intrinsic if the pow is an
+ // intrinsic, to match errno semantics.
+ return emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc_sqrt), B,
+ Callee->getAttributes());
+ }
+
+ // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
+ // This is faster than calling pow, and still handles negative zero
+ // and negative infinity correctly.
+ // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
+ Value *Inf = ConstantFP::getInfinity(CI->getType());
+ Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
+
+ // TODO: As above, we should lower to the sqrt intrinsic if the pow is an
+ // intrinsic, to match errno semantics.
+ Value *Sqrt = emitUnaryFloatFnCall(Op1, "sqrt", B, Callee->getAttributes());
+
+ Module *M = Callee->getParent();
+ Function *FabsF = Intrinsic::getDeclaration(M, Intrinsic::fabs,
+ CI->getType());
+ Value *FAbs = B.CreateCall(FabsF, Sqrt);
+
+ Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf);
+ Value *Sel = B.CreateSelect(FCmp, Inf, FAbs);
+ return Sel;
+ }
+
+ if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x
+ return Op1;
+ if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x
+ return B.CreateFMul(Op1, Op1, "pow2");
+ if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
+ return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip");
+
+ // In -ffast-math, generate repeated fmul instead of generating pow(x, n).
+ if (CI->hasUnsafeAlgebra()) {
+ APFloat V = abs(Op2C->getValueAPF());
+ // We limit to a max of 7 fmul(s). Thus max exponent is 32.
+ // This transformation applies to integer exponents only.
+ if (V.compare(APFloat(V.getSemantics(), 32.0)) == APFloat::cmpGreaterThan ||
+ !V.isInteger())
+ return nullptr;
+
+ // Propagate fast math flags.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(CI->getFastMathFlags());
+
+ // We will memoize intermediate products of the Addition Chain.
+ Value *InnerChain[33] = {nullptr};
+ InnerChain[1] = Op1;
+ InnerChain[2] = B.CreateFMul(Op1, Op1);
+
+ // We cannot readily convert a non-double type (like float) to a double.
+ // So we first convert V to something which could be converted to double.
+ bool ignored;
+ V.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &ignored);
+
+ Value *FMul = getPow(InnerChain, V.convertToDouble(), B);
+ // For negative exponents simply compute the reciprocal.
+ if (Op2C->isNegative())
+ FMul = B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), FMul);
+ return FMul;
+ }
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ Value *Ret = nullptr;
+ StringRef Name = Callee->getName();
+ if (UnsafeFPShrink && Name == "exp2" && hasFloatVersion(Name))
+ Ret = optimizeUnaryDoubleFP(CI, B, true);
+
+ Value *Op = CI->getArgOperand(0);
+ // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
+ // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
+ LibFunc LdExp = LibFunc_ldexpl;
+ if (Op->getType()->isFloatTy())
+ LdExp = LibFunc_ldexpf;
+ else if (Op->getType()->isDoubleTy())
+ LdExp = LibFunc_ldexp;
+
+ if (TLI->has(LdExp)) {
+ Value *LdExpArg = nullptr;
+ if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
+ LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
+ } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
+ LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
+ }
+
+ if (LdExpArg) {
+ Constant *One = ConstantFP::get(CI->getContext(), APFloat(1.0f));
+ if (!Op->getType()->isFloatTy())
+ One = ConstantExpr::getFPExtend(One, Op->getType());
+
+ Module *M = CI->getModule();
+ Value *NewCallee =
+ M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(),
+ Op->getType(), B.getInt32Ty());
+ CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg});
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+ }
+ }
+ return Ret;
+}
+
+Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ // If we can shrink the call to a float function rather than a double
+ // function, do that first.
+ StringRef Name = Callee->getName();
+ if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name))
+ if (Value *Ret = optimizeBinaryDoubleFP(CI, B))
+ return Ret;
+
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ FastMathFlags FMF;
+ if (CI->hasUnsafeAlgebra()) {
+ // Unsafe algebra sets all fast-math-flags to true.
+ FMF.setUnsafeAlgebra();
+ } else {
+ // At a minimum, no-nans-fp-math must be true.
+ if (!CI->hasNoNaNs())
+ return nullptr;
+ // No-signed-zeros is implied by the definitions of fmax/fmin themselves:
+ // "Ideally, fmax would be sensitive to the sign of zero, for example
+ // fmax(-0. 0, +0. 0) would return +0; however, implementation in software
+ // might be impractical."
+ FMF.setNoSignedZeros();
+ FMF.setNoNaNs();
+ }
+ B.setFastMathFlags(FMF);
+
+ // We have a relaxed floating-point environment. We can ignore NaN-handling
+ // and transform to a compare and select. We do not have to consider errno or
+ // exceptions, because fmin/fmax do not have those.
+ Value *Op0 = CI->getArgOperand(0);
+ Value *Op1 = CI->getArgOperand(1);
+ Value *Cmp = Callee->getName().startswith("fmin") ?
+ B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1);
+ return B.CreateSelect(Cmp, Op0, Op1);
+}
+
+Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ Value *Ret = nullptr;
+ StringRef Name = Callee->getName();
+ if (UnsafeFPShrink && hasFloatVersion(Name))
+ Ret = optimizeUnaryDoubleFP(CI, B, true);
+
+ if (!CI->hasUnsafeAlgebra())
+ return Ret;
+ Value *Op1 = CI->getArgOperand(0);
+ auto *OpC = dyn_cast<CallInst>(Op1);
+
+ // The earlier call must also be unsafe in order to do these transforms.
+ if (!OpC || !OpC->hasUnsafeAlgebra())
+ return Ret;
+
+ // log(pow(x,y)) -> y*log(x)
+ // This is only applicable to log, log2, log10.
+ if (Name != "log" && Name != "log2" && Name != "log10")
+ return Ret;
+
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ FastMathFlags FMF;
+ FMF.setUnsafeAlgebra();
+ B.setFastMathFlags(FMF);
+
+ LibFunc Func;
+ Function *F = OpC->getCalledFunction();
+ if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
+ Func == LibFunc_pow) || F->getIntrinsicID() == Intrinsic::pow))
+ return B.CreateFMul(OpC->getArgOperand(1),
+ emitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B,
+ Callee->getAttributes()), "mul");
+
+ // log(exp2(y)) -> y*log(2)
+ if (F && Name == "log" && TLI->getLibFunc(F->getName(), Func) &&
+ TLI->has(Func) && Func == LibFunc_exp2)
+ return B.CreateFMul(
+ OpC->getArgOperand(0),
+ emitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0),
+ Callee->getName(), B, Callee->getAttributes()),
+ "logmul");
+ return Ret;
+}
+
+Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ Value *Ret = nullptr;
+ // TODO: Once we have a way (other than checking for the existince of the
+ // libcall) to tell whether our target can lower @llvm.sqrt, relax the
+ // condition below.
+ if (TLI->has(LibFunc_sqrtf) && (Callee->getName() == "sqrt" ||
+ Callee->getIntrinsicID() == Intrinsic::sqrt))
+ Ret = optimizeUnaryDoubleFP(CI, B, true);
+
+ if (!CI->hasUnsafeAlgebra())
+ return Ret;
+
+ Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0));
+ if (!I || I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
+ return Ret;
+
+ // We're looking for a repeated factor in a multiplication tree,
+ // so we can do this fold: sqrt(x * x) -> fabs(x);
+ // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y).
+ Value *Op0 = I->getOperand(0);
+ Value *Op1 = I->getOperand(1);
+ Value *RepeatOp = nullptr;
+ Value *OtherOp = nullptr;
+ if (Op0 == Op1) {
+ // Simple match: the operands of the multiply are identical.
+ RepeatOp = Op0;
+ } else {
+ // Look for a more complicated pattern: one of the operands is itself
+ // a multiply, so search for a common factor in that multiply.
+ // Note: We don't bother looking any deeper than this first level or for
+ // variations of this pattern because instcombine's visitFMUL and/or the
+ // reassociation pass should give us this form.
+ Value *OtherMul0, *OtherMul1;
+ if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) {
+ // Pattern: sqrt((x * y) * z)
+ if (OtherMul0 == OtherMul1 &&
+ cast<Instruction>(Op0)->hasUnsafeAlgebra()) {
+ // Matched: sqrt((x * x) * z)
+ RepeatOp = OtherMul0;
+ OtherOp = Op1;
+ }
+ }
+ }
+ if (!RepeatOp)
+ return Ret;
+
+ // Fast math flags for any created instructions should match the sqrt
+ // and multiply.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(I->getFastMathFlags());
+
+ // If we found a repeated factor, hoist it out of the square root and
+ // replace it with the fabs of that factor.
+ Module *M = Callee->getParent();
+ Type *ArgType = I->getType();
+ Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
+ Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs");
+ if (OtherOp) {
+ // If we found a non-repeated factor, we still need to get its square
+ // root. We then multiply that by the value that was simplified out
+ // of the square root calculation.
+ Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
+ Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt");
+ return B.CreateFMul(FabsCall, SqrtCall);
+ }
+ return FabsCall;
+}
+
+// TODO: Generalize to handle any trig function and its inverse.
+Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ Value *Ret = nullptr;
+ StringRef Name = Callee->getName();
+ if (UnsafeFPShrink && Name == "tan" && hasFloatVersion(Name))
+ Ret = optimizeUnaryDoubleFP(CI, B, true);
+
+ Value *Op1 = CI->getArgOperand(0);
+ auto *OpC = dyn_cast<CallInst>(Op1);
+ if (!OpC)
+ return Ret;
+
+ // Both calls must allow unsafe optimizations in order to remove them.
+ if (!CI->hasUnsafeAlgebra() || !OpC->hasUnsafeAlgebra())
+ return Ret;
+
+ // tan(atan(x)) -> x
+ // tanf(atanf(x)) -> x
+ // tanl(atanl(x)) -> x
+ LibFunc Func;
+ Function *F = OpC->getCalledFunction();
+ if (F && TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
+ ((Func == LibFunc_atan && Callee->getName() == "tan") ||
+ (Func == LibFunc_atanf && Callee->getName() == "tanf") ||
+ (Func == LibFunc_atanl && Callee->getName() == "tanl")))
+ Ret = OpC->getArgOperand(0);
+ return Ret;
+}
+
+static bool isTrigLibCall(CallInst *CI) {
+ // We can only hope to do anything useful if we can ignore things like errno
+ // and floating-point exceptions.
+ // We already checked the prototype.
+ return CI->hasFnAttr(Attribute::NoUnwind) &&
+ CI->hasFnAttr(Attribute::ReadNone);
+}
+
+static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
+ bool UseFloat, Value *&Sin, Value *&Cos,
+ Value *&SinCos) {
+ Type *ArgTy = Arg->getType();
+ Type *ResTy;
+ StringRef Name;
+
+ Triple T(OrigCallee->getParent()->getTargetTriple());
+ if (UseFloat) {
+ Name = "__sincospif_stret";
+
+ assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
+ // x86_64 can't use {float, float} since that would be returned in both
+ // xmm0 and xmm1, which isn't what a real struct would do.
+ ResTy = T.getArch() == Triple::x86_64
+ ? static_cast<Type *>(VectorType::get(ArgTy, 2))
+ : static_cast<Type *>(StructType::get(ArgTy, ArgTy));
+ } else {
+ Name = "__sincospi_stret";
+ ResTy = StructType::get(ArgTy, ArgTy);
+ }
+
+ Module *M = OrigCallee->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(),
+ ResTy, ArgTy);
+
+ if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
+ // If the argument is an instruction, it must dominate all uses so put our
+ // sincos call there.
+ B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
+ } else {
+ // Otherwise (e.g. for a constant) the beginning of the function is as
+ // good a place as any.
+ BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock();
+ B.SetInsertPoint(&EntryBB, EntryBB.begin());
+ }
+
+ SinCos = B.CreateCall(Callee, Arg, "sincospi");
+
+ if (SinCos->getType()->isStructTy()) {
+ Sin = B.CreateExtractValue(SinCos, 0, "sinpi");
+ Cos = B.CreateExtractValue(SinCos, 1, "cospi");
+ } else {
+ Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0),
+ "sinpi");
+ Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1),
+ "cospi");
+ }
+}
+
+Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilder<> &B) {
+ // Make sure the prototype is as expected, otherwise the rest of the
+ // function is probably invalid and likely to abort.
+ if (!isTrigLibCall(CI))
+ return nullptr;
+
+ Value *Arg = CI->getArgOperand(0);
+ SmallVector<CallInst *, 1> SinCalls;
+ SmallVector<CallInst *, 1> CosCalls;
+ SmallVector<CallInst *, 1> SinCosCalls;
+
+ bool IsFloat = Arg->getType()->isFloatTy();
+
+ // Look for all compatible sinpi, cospi and sincospi calls with the same
+ // argument. If there are enough (in some sense) we can make the
+ // substitution.
+ Function *F = CI->getFunction();
+ for (User *U : Arg->users())
+ classifyArgUse(U, F, IsFloat, SinCalls, CosCalls, SinCosCalls);
+
+ // It's only worthwhile if both sinpi and cospi are actually used.
+ if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty()))
+ return nullptr;
+
+ Value *Sin, *Cos, *SinCos;
+ insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos, SinCos);
+
+ auto replaceTrigInsts = [this](SmallVectorImpl<CallInst *> &Calls,
+ Value *Res) {
+ for (CallInst *C : Calls)
+ replaceAllUsesWith(C, Res);
+ };
+
+ replaceTrigInsts(SinCalls, Sin);
+ replaceTrigInsts(CosCalls, Cos);
+ replaceTrigInsts(SinCosCalls, SinCos);
+
+ return nullptr;
+}
+
+void LibCallSimplifier::classifyArgUse(
+ Value *Val, Function *F, bool IsFloat,
+ SmallVectorImpl<CallInst *> &SinCalls,
+ SmallVectorImpl<CallInst *> &CosCalls,
+ SmallVectorImpl<CallInst *> &SinCosCalls) {
+ CallInst *CI = dyn_cast<CallInst>(Val);
+
+ if (!CI)
+ return;
+
+ // Don't consider calls in other functions.
+ if (CI->getFunction() != F)
+ return;
+
+ Function *Callee = CI->getCalledFunction();
+ LibFunc Func;
+ if (!Callee || !TLI->getLibFunc(*Callee, Func) || !TLI->has(Func) ||
+ !isTrigLibCall(CI))
+ return;
+
+ if (IsFloat) {
+ if (Func == LibFunc_sinpif)
+ SinCalls.push_back(CI);
+ else if (Func == LibFunc_cospif)
+ CosCalls.push_back(CI);
+ else if (Func == LibFunc_sincospif_stret)
+ SinCosCalls.push_back(CI);
+ } else {
+ if (Func == LibFunc_sinpi)
+ SinCalls.push_back(CI);
+ else if (Func == LibFunc_cospi)
+ CosCalls.push_back(CI);
+ else if (Func == LibFunc_sincospi_stret)
+ SinCosCalls.push_back(CI);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Integer Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
+ // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
+ Value *Op = CI->getArgOperand(0);
+ Type *ArgType = Op->getType();
+ Value *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
+ Intrinsic::cttz, ArgType);
+ Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz");
+ V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
+ V = B.CreateIntCast(V, B.getInt32Ty(), false);
+
+ Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
+ return B.CreateSelect(Cond, V, B.getInt32(0));
+}
+
+Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilder<> &B) {
+ // fls(x) -> (i32)(sizeInBits(x) - llvm.ctlz(x, false))
+ Value *Op = CI->getArgOperand(0);
+ Type *ArgType = Op->getType();
+ Value *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
+ Intrinsic::ctlz, ArgType);
+ Value *V = B.CreateCall(F, {Op, B.getFalse()}, "ctlz");
+ V = B.CreateSub(ConstantInt::get(V->getType(), ArgType->getIntegerBitWidth()),
+ V);
+ return B.CreateIntCast(V, CI->getType(), false);
+}
+
+Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) {
+ // abs(x) -> x >s -1 ? x : -x
+ Value *Op = CI->getArgOperand(0);
+ Value *Pos =
+ B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()), "ispos");
+ Value *Neg = B.CreateNeg(Op, "neg");
+ return B.CreateSelect(Pos, Op, Neg);
+}
+
+Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) {
+ // isdigit(c) -> (c-'0') <u 10
+ Value *Op = CI->getArgOperand(0);
+ Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
+ Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit");
+ return B.CreateZExt(Op, CI->getType());
+}
+
+Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilder<> &B) {
+ // isascii(c) -> c <u 128
+ Value *Op = CI->getArgOperand(0);
+ Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
+ return B.CreateZExt(Op, CI->getType());
+}
+
+Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilder<> &B) {
+ // toascii(c) -> c & 0x7f
+ return B.CreateAnd(CI->getArgOperand(0),
+ ConstantInt::get(CI->getType(), 0x7F));
+}
+
+//===----------------------------------------------------------------------===//
+// Formatting and IO Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg);
+
+Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B,
+ int StreamArg) {
+ Function *Callee = CI->getCalledFunction();
+ // Error reporting calls should be cold, mark them as such.
+ // This applies even to non-builtin calls: it is only a hint and applies to
+ // functions that the frontend might not understand as builtins.
+
+ // This heuristic was suggested in:
+ // Improving Static Branch Prediction in a Compiler
+ // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu
+ // Proceedings of PACT'98, Oct. 1998, IEEE
+ if (!CI->hasFnAttr(Attribute::Cold) &&
+ isReportingError(Callee, CI, StreamArg)) {
+ CI->addAttribute(AttributeList::FunctionIndex, Attribute::Cold);
+ }
+
+ return nullptr;
+}
+
+static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) {
+ if (!Callee || !Callee->isDeclaration())
+ return false;
+
+ if (StreamArg < 0)
+ return true;
+
+ // These functions might be considered cold, but only if their stream
+ // argument is stderr.
+
+ if (StreamArg >= (int)CI->getNumArgOperands())
+ return false;
+ LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg));
+ if (!LI)
+ return false;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand());
+ if (!GV || !GV->isDeclaration())
+ return false;
+ return GV->getName() == "stderr";
+}
+
+Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) {
+ // Check for a fixed format string.
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
+ return nullptr;
+
+ // Empty format string -> noop.
+ if (FormatStr.empty()) // Tolerate printf's declared void.
+ return CI->use_empty() ? (Value *)CI : ConstantInt::get(CI->getType(), 0);
+
+ // Do not do any of the following transformations if the printf return value
+ // is used, in general the printf return value is not compatible with either
+ // putchar() or puts().
+ if (!CI->use_empty())
+ return nullptr;
+
+ // printf("x") -> putchar('x'), even for "%" and "%%".
+ if (FormatStr.size() == 1 || FormatStr == "%%")
+ return emitPutChar(B.getInt32(FormatStr[0]), B, TLI);
+
+ // printf("%s", "a") --> putchar('a')
+ if (FormatStr == "%s" && CI->getNumArgOperands() > 1) {
+ StringRef ChrStr;
+ if (!getConstantStringInfo(CI->getOperand(1), ChrStr))
+ return nullptr;
+ if (ChrStr.size() != 1)
+ return nullptr;
+ return emitPutChar(B.getInt32(ChrStr[0]), B, TLI);
+ }
+
+ // printf("foo\n") --> puts("foo")
+ if (FormatStr[FormatStr.size() - 1] == '\n' &&
+ FormatStr.find('%') == StringRef::npos) { // No format characters.
+ // Create a string literal with no \n on it. We expect the constant merge
+ // pass to be run after this pass, to merge duplicate strings.
+ FormatStr = FormatStr.drop_back();
+ Value *GV = B.CreateGlobalString(FormatStr, "str");
+ return emitPutS(GV, B, TLI);
+ }
+
+ // Optimize specific format strings.
+ // printf("%c", chr) --> putchar(chr)
+ if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
+ CI->getArgOperand(1)->getType()->isIntegerTy())
+ return emitPutChar(CI->getArgOperand(1), B, TLI);
+
+ // printf("%s\n", str) --> puts(str)
+ if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
+ CI->getArgOperand(1)->getType()->isPointerTy())
+ return emitPutS(CI->getArgOperand(1), B, TLI);
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) {
+
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ if (Value *V = optimizePrintFString(CI, B)) {
+ return V;
+ }
+
+ // printf(format, ...) -> iprintf(format, ...) if no floating point
+ // arguments.
+ if (TLI->has(LibFunc_iprintf) && !callHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *IPrintFFn =
+ M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(IPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
+ // Check for a fixed format string.
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
+ return nullptr;
+
+ // If we just have a format string (nothing else crazy) transform it.
+ if (CI->getNumArgOperands() == 2) {
+ // Make sure there's no % in the constant array. We could try to handle
+ // %% -> % in the future if we cared.
+ for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+ if (FormatStr[i] == '%')
+ return nullptr; // we found a format specifier, bail out.
+
+ // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()),
+ FormatStr.size() + 1),
+ 1); // Copy the null byte.
+ return ConstantInt::get(CI->getType(), FormatStr.size());
+ }
+
+ // The remaining optimizations require the format string to be "%s" or "%c"
+ // and have an extra operand.
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+ CI->getNumArgOperands() < 3)
+ return nullptr;
+
+ // Decode the second character of the format string.
+ if (FormatStr[1] == 'c') {
+ // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy())
+ return nullptr;
+ Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
+ Value *Ptr = castToCStr(CI->getArgOperand(0), B);
+ B.CreateStore(V, Ptr);
+ Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
+ B.CreateStore(B.getInt8(0), Ptr);
+
+ return ConstantInt::get(CI->getType(), 1);
+ }
+
+ if (FormatStr[1] == 's') {
+ // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
+ if (!CI->getArgOperand(2)->getType()->isPointerTy())
+ return nullptr;
+
+ Value *Len = emitStrLen(CI->getArgOperand(2), B, DL, TLI);
+ if (!Len)
+ return nullptr;
+ Value *IncLen =
+ B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc");
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1);
+
+ // The sprintf result is the unincremented number of bytes in the string.
+ return B.CreateIntCast(Len, CI->getType(), false);
+ }
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ if (Value *V = optimizeSPrintFString(CI, B)) {
+ return V;
+ }
+
+ // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
+ // point arguments.
+ if (TLI->has(LibFunc_siprintf) && !callHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *SIPrintFFn =
+ M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(SIPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) {
+ optimizeErrorReporting(CI, B, 0);
+
+ // All the optimizations depend on the format string.
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
+ return nullptr;
+
+ // Do not do any of the following transformations if the fprintf return
+ // value is used, in general the fprintf return value is not compatible
+ // with fwrite(), fputc() or fputs().
+ if (!CI->use_empty())
+ return nullptr;
+
+ // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
+ if (CI->getNumArgOperands() == 2) {
+ for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+ if (FormatStr[i] == '%') // Could handle %% -> % if we cared.
+ return nullptr; // We found a format specifier.
+
+ return emitFWrite(
+ CI->getArgOperand(1),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), FormatStr.size()),
+ CI->getArgOperand(0), B, DL, TLI);
+ }
+
+ // The remaining optimizations require the format string to be "%s" or "%c"
+ // and have an extra operand.
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+ CI->getNumArgOperands() < 3)
+ return nullptr;
+
+ // Decode the second character of the format string.
+ if (FormatStr[1] == 'c') {
+ // fprintf(F, "%c", chr) --> fputc(chr, F)
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy())
+ return nullptr;
+ return emitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
+ }
+
+ if (FormatStr[1] == 's') {
+ // fprintf(F, "%s", str) --> fputs(str, F)
+ if (!CI->getArgOperand(2)->getType()->isPointerTy())
+ return nullptr;
+ return emitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
+ }
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ if (Value *V = optimizeFPrintFString(CI, B)) {
+ return V;
+ }
+
+ // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
+ // floating point arguments.
+ if (TLI->has(LibFunc_fiprintf) && !callHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *FIPrintFFn =
+ M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(FIPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) {
+ optimizeErrorReporting(CI, B, 3);
+
+ // Get the element size and count.
+ ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (!SizeC || !CountC)
+ return nullptr;
+ uint64_t Bytes = SizeC->getZExtValue() * CountC->getZExtValue();
+
+ // If this is writing zero records, remove the call (it's a noop).
+ if (Bytes == 0)
+ return ConstantInt::get(CI->getType(), 0);
+
+ // If this is writing one byte, turn it into fputc.
+ // This optimisation is only valid, if the return value is unused.
+ if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
+ Value *Char = B.CreateLoad(castToCStr(CI->getArgOperand(0), B), "char");
+ Value *NewCI = emitFPutC(Char, CI->getArgOperand(3), B, TLI);
+ return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
+ }
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
+ optimizeErrorReporting(CI, B, 1);
+
+ // Don't rewrite fputs to fwrite when optimising for size because fwrite
+ // requires more arguments and thus extra MOVs are required.
+ if (CI->getParent()->getParent()->optForSize())
+ return nullptr;
+
+ // We can't optimize if return value is used.
+ if (!CI->use_empty())
+ return nullptr;
+
+ // fputs(s,F) --> fwrite(s,1,strlen(s),F)
+ uint64_t Len = GetStringLength(CI->getArgOperand(0));
+ if (!Len)
+ return nullptr;
+
+ // Known to have no uses (see above).
+ return emitFWrite(
+ CI->getArgOperand(0),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1),
+ CI->getArgOperand(1), B, DL, TLI);
+}
+
+Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
+ // Check for a constant string.
+ StringRef Str;
+ if (!getConstantStringInfo(CI->getArgOperand(0), Str))
+ return nullptr;
+
+ if (Str.empty() && CI->use_empty()) {
+ // puts("") -> putchar('\n')
+ Value *Res = emitPutChar(B.getInt32('\n'), B, TLI);
+ if (CI->use_empty() || !Res)
+ return Res;
+ return B.CreateIntCast(Res, CI->getType(), true);
+ }
+
+ return nullptr;
+}
+
+bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) {
+ LibFunc Func;
+ SmallString<20> FloatFuncName = FuncName;
+ FloatFuncName += 'f';
+ if (TLI->getLibFunc(FloatFuncName, Func))
+ return TLI->has(Func);
+ return false;
+}
+
+Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
+ IRBuilder<> &Builder) {
+ LibFunc Func;
+ Function *Callee = CI->getCalledFunction();
+ // Check for string/memory library functions.
+ if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) {
+ // Make sure we never change the calling convention.
+ assert((ignoreCallingConv(Func) ||
+ isCallingConvCCompatible(CI)) &&
+ "Optimizing string/memory libcall would change the calling convention");
+ switch (Func) {
+ case LibFunc_strcat:
+ return optimizeStrCat(CI, Builder);
+ case LibFunc_strncat:
+ return optimizeStrNCat(CI, Builder);
+ case LibFunc_strchr:
+ return optimizeStrChr(CI, Builder);
+ case LibFunc_strrchr:
+ return optimizeStrRChr(CI, Builder);
+ case LibFunc_strcmp:
+ return optimizeStrCmp(CI, Builder);
+ case LibFunc_strncmp:
+ return optimizeStrNCmp(CI, Builder);
+ case LibFunc_strcpy:
+ return optimizeStrCpy(CI, Builder);
+ case LibFunc_stpcpy:
+ return optimizeStpCpy(CI, Builder);
+ case LibFunc_strncpy:
+ return optimizeStrNCpy(CI, Builder);
+ case LibFunc_strlen:
+ return optimizeStrLen(CI, Builder);
+ case LibFunc_strpbrk:
+ return optimizeStrPBrk(CI, Builder);
+ case LibFunc_strtol:
+ case LibFunc_strtod:
+ case LibFunc_strtof:
+ case LibFunc_strtoul:
+ case LibFunc_strtoll:
+ case LibFunc_strtold:
+ case LibFunc_strtoull:
+ return optimizeStrTo(CI, Builder);
+ case LibFunc_strspn:
+ return optimizeStrSpn(CI, Builder);
+ case LibFunc_strcspn:
+ return optimizeStrCSpn(CI, Builder);
+ case LibFunc_strstr:
+ return optimizeStrStr(CI, Builder);
+ case LibFunc_memchr:
+ return optimizeMemChr(CI, Builder);
+ case LibFunc_memcmp:
+ return optimizeMemCmp(CI, Builder);
+ case LibFunc_memcpy:
+ return optimizeMemCpy(CI, Builder);
+ case LibFunc_memmove:
+ return optimizeMemMove(CI, Builder);
+ case LibFunc_memset:
+ return optimizeMemSet(CI, Builder);
+ case LibFunc_wcslen:
+ return optimizeWcslen(CI, Builder);
+ default:
+ break;
+ }
+ }
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
+ if (CI->isNoBuiltin())
+ return nullptr;
+
+ LibFunc Func;
+ Function *Callee = CI->getCalledFunction();
+ StringRef FuncName = Callee->getName();
+
+ SmallVector<OperandBundleDef, 2> OpBundles;
+ CI->getOperandBundlesAsDefs(OpBundles);
+ IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles);
+ bool isCallingConvC = isCallingConvCCompatible(CI);
+
+ // Command-line parameter overrides instruction attribute.
+ if (EnableUnsafeFPShrink.getNumOccurrences() > 0)
+ UnsafeFPShrink = EnableUnsafeFPShrink;
+ else if (isa<FPMathOperator>(CI) && CI->hasUnsafeAlgebra())
+ UnsafeFPShrink = true;
+
+ // First, check for intrinsics.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
+ if (!isCallingConvC)
+ return nullptr;
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::pow:
+ return optimizePow(CI, Builder);
+ case Intrinsic::exp2:
+ return optimizeExp2(CI, Builder);
+ case Intrinsic::log:
+ return optimizeLog(CI, Builder);
+ case Intrinsic::sqrt:
+ return optimizeSqrt(CI, Builder);
+ // TODO: Use foldMallocMemset() with memset intrinsic.
+ default:
+ return nullptr;
+ }
+ }
+
+ // Also try to simplify calls to fortified library functions.
+ if (Value *SimplifiedFortifiedCI = FortifiedSimplifier.optimizeCall(CI)) {
+ // Try to further simplify the result.
+ CallInst *SimplifiedCI = dyn_cast<CallInst>(SimplifiedFortifiedCI);
+ if (SimplifiedCI && SimplifiedCI->getCalledFunction()) {
+ // Use an IR Builder from SimplifiedCI if available instead of CI
+ // to guarantee we reach all uses we might replace later on.
+ IRBuilder<> TmpBuilder(SimplifiedCI);
+ if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, TmpBuilder)) {
+ // If we were able to further simplify, remove the now redundant call.
+ SimplifiedCI->replaceAllUsesWith(V);
+ SimplifiedCI->eraseFromParent();
+ return V;
+ }
+ }
+ return SimplifiedFortifiedCI;
+ }
+
+ // Then check for known library functions.
+ if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) {
+ // We never change the calling convention.
+ if (!ignoreCallingConv(Func) && !isCallingConvC)
+ return nullptr;
+ if (Value *V = optimizeStringMemoryLibCall(CI, Builder))
+ return V;
+ switch (Func) {
+ case LibFunc_cosf:
+ case LibFunc_cos:
+ case LibFunc_cosl:
+ return optimizeCos(CI, Builder);
+ case LibFunc_sinpif:
+ case LibFunc_sinpi:
+ case LibFunc_cospif:
+ case LibFunc_cospi:
+ return optimizeSinCosPi(CI, Builder);
+ case LibFunc_powf:
+ case LibFunc_pow:
+ case LibFunc_powl:
+ return optimizePow(CI, Builder);
+ case LibFunc_exp2l:
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
+ return optimizeExp2(CI, Builder);
+ case LibFunc_fabsf:
+ case LibFunc_fabs:
+ case LibFunc_fabsl:
+ return replaceUnaryCall(CI, Builder, Intrinsic::fabs);
+ case LibFunc_sqrtf:
+ case LibFunc_sqrt:
+ case LibFunc_sqrtl:
+ return optimizeSqrt(CI, Builder);
+ case LibFunc_ffs:
+ case LibFunc_ffsl:
+ case LibFunc_ffsll:
+ return optimizeFFS(CI, Builder);
+ case LibFunc_fls:
+ case LibFunc_flsl:
+ case LibFunc_flsll:
+ return optimizeFls(CI, Builder);
+ case LibFunc_abs:
+ case LibFunc_labs:
+ case LibFunc_llabs:
+ return optimizeAbs(CI, Builder);
+ case LibFunc_isdigit:
+ return optimizeIsDigit(CI, Builder);
+ case LibFunc_isascii:
+ return optimizeIsAscii(CI, Builder);
+ case LibFunc_toascii:
+ return optimizeToAscii(CI, Builder);
+ case LibFunc_printf:
+ return optimizePrintF(CI, Builder);
+ case LibFunc_sprintf:
+ return optimizeSPrintF(CI, Builder);
+ case LibFunc_fprintf:
+ return optimizeFPrintF(CI, Builder);
+ case LibFunc_fwrite:
+ return optimizeFWrite(CI, Builder);
+ case LibFunc_fputs:
+ return optimizeFPuts(CI, Builder);
+ case LibFunc_log:
+ case LibFunc_log10:
+ case LibFunc_log1p:
+ case LibFunc_log2:
+ case LibFunc_logb:
+ return optimizeLog(CI, Builder);
+ case LibFunc_puts:
+ return optimizePuts(CI, Builder);
+ case LibFunc_tan:
+ case LibFunc_tanf:
+ case LibFunc_tanl:
+ return optimizeTan(CI, Builder);
+ case LibFunc_perror:
+ return optimizeErrorReporting(CI, Builder);
+ case LibFunc_vfprintf:
+ case LibFunc_fiprintf:
+ return optimizeErrorReporting(CI, Builder, 0);
+ case LibFunc_fputc:
+ return optimizeErrorReporting(CI, Builder, 1);
+ case LibFunc_ceil:
+ return replaceUnaryCall(CI, Builder, Intrinsic::ceil);
+ case LibFunc_floor:
+ return replaceUnaryCall(CI, Builder, Intrinsic::floor);
+ case LibFunc_round:
+ return replaceUnaryCall(CI, Builder, Intrinsic::round);
+ case LibFunc_nearbyint:
+ return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint);
+ case LibFunc_rint:
+ return replaceUnaryCall(CI, Builder, Intrinsic::rint);
+ case LibFunc_trunc:
+ return replaceUnaryCall(CI, Builder, Intrinsic::trunc);
+ case LibFunc_acos:
+ case LibFunc_acosh:
+ case LibFunc_asin:
+ case LibFunc_asinh:
+ case LibFunc_atan:
+ case LibFunc_atanh:
+ case LibFunc_cbrt:
+ case LibFunc_cosh:
+ case LibFunc_exp:
+ case LibFunc_exp10:
+ case LibFunc_expm1:
+ case LibFunc_sin:
+ case LibFunc_sinh:
+ case LibFunc_tanh:
+ if (UnsafeFPShrink && hasFloatVersion(FuncName))
+ return optimizeUnaryDoubleFP(CI, Builder, true);
+ return nullptr;
+ case LibFunc_copysign:
+ if (hasFloatVersion(FuncName))
+ return optimizeBinaryDoubleFP(CI, Builder);
+ return nullptr;
+ case LibFunc_fminf:
+ case LibFunc_fmin:
+ case LibFunc_fminl:
+ case LibFunc_fmaxf:
+ case LibFunc_fmax:
+ case LibFunc_fmaxl:
+ return optimizeFMinFMax(CI, Builder);
+ default:
+ return nullptr;
+ }
+ }
+ return nullptr;
+}
+
+LibCallSimplifier::LibCallSimplifier(
+ const DataLayout &DL, const TargetLibraryInfo *TLI,
+ function_ref<void(Instruction *, Value *)> Replacer)
+ : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), UnsafeFPShrink(false),
+ Replacer(Replacer) {}
+
+void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
+ // Indirect through the replacer used in this instance.
+ Replacer(I, With);
+}
+
+// TODO:
+// Additional cases that we need to add to this file:
+//
+// cbrt:
+// * cbrt(expN(X)) -> expN(x/3)
+// * cbrt(sqrt(x)) -> pow(x,1/6)
+// * cbrt(cbrt(x)) -> pow(x,1/9)
+//
+// exp, expf, expl:
+// * exp(log(x)) -> x
+//
+// log, logf, logl:
+// * log(exp(x)) -> x
+// * log(exp(y)) -> y*log(e)
+// * log(exp10(y)) -> y*log(10)
+// * log(sqrt(x)) -> 0.5*log(x)
+//
+// pow, powf, powl:
+// * pow(sqrt(x),y) -> pow(x,y*0.5)
+// * pow(pow(x,y),z)-> pow(x,y*z)
+//
+// signbit:
+// * signbit(cnst) -> cnst'
+// * signbit(nncst) -> 0 (if pstv is a non-negative constant)
+//
+// sqrt, sqrtf, sqrtl:
+// * sqrt(expN(x)) -> expN(x*0.5)
+// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
+// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
+//
+
+//===----------------------------------------------------------------------===//
+// Fortified Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
+ unsigned ObjSizeOp,
+ unsigned SizeOp,
+ bool isString) {
+ if (CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(SizeOp))
+ return true;
+ if (ConstantInt *ObjSizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) {
+ if (ObjSizeCI->isMinusOne())
+ return true;
+ // If the object size wasn't -1 (unknown), bail out if we were asked to.
+ if (OnlyLowerUnknownSize)
+ return false;
+ if (isString) {
+ uint64_t Len = GetStringLength(CI->getArgOperand(SizeOp));
+ // If the length is 0 we don't know how long it is and so we can't
+ // remove the check.
+ if (Len == 0)
+ return false;
+ return ObjSizeCI->getZExtValue() >= Len;
+ }
+ if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeOp)))
+ return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue();
+ }
+ return false;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
+ IRBuilder<> &B) {
+ if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
+ IRBuilder<> &B) {
+ if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
+ IRBuilder<> &B) {
+ // TODO: Try foldMallocMemset() here.
+
+ if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
+ IRBuilder<> &B,
+ LibFunc Func) {
+ Function *Callee = CI->getCalledFunction();
+ StringRef Name = Callee->getName();
+ const DataLayout &DL = CI->getModule()->getDataLayout();
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1),
+ *ObjSize = CI->getArgOperand(2);
+
+ // __stpcpy_chk(x,x,...) -> x+strlen(x)
+ if (Func == LibFunc_stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
+ Value *StrLen = emitStrLen(Src, B, DL, TLI);
+ return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
+ }
+
+ // If a) we don't have any length information, or b) we know this will
+ // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
+ // st[rp]cpy_chk call which may fail at runtime if the size is too long.
+ // TODO: It might be nice to get a maximum length out of the possible
+ // string lengths for varying.
+ if (isFortifiedCallFoldable(CI, 2, 1, true))
+ return emitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6));
+
+ if (OnlyLowerUnknownSize)
+ return nullptr;
+
+ // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0)
+ return nullptr;
+
+ Type *SizeTTy = DL.getIntPtrType(CI->getContext());
+ Value *LenV = ConstantInt::get(SizeTTy, Len);
+ Value *Ret = emitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
+ // If the function was an __stpcpy_chk, and we were able to fold it into
+ // a __memcpy_chk, we still need to return the correct end pointer.
+ if (Ret && Func == LibFunc_stpcpy_chk)
+ return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1));
+ return Ret;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
+ IRBuilder<> &B,
+ LibFunc Func) {
+ Function *Callee = CI->getCalledFunction();
+ StringRef Name = Callee->getName();
+ if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+ Value *Ret = emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI, Name.substr(2, 7));
+ return Ret;
+ }
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
+ // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here.
+ // Some clang users checked for _chk libcall availability using:
+ // __has_builtin(__builtin___memcpy_chk)
+ // When compiling with -fno-builtin, this is always true.
+ // When passing -ffreestanding/-mkernel, which both imply -fno-builtin, we
+ // end up with fortified libcalls, which isn't acceptable in a freestanding
+ // environment which only provides their non-fortified counterparts.
+ //
+ // Until we change clang and/or teach external users to check for availability
+ // differently, disregard the "nobuiltin" attribute and TLI::has.
+ //
+ // PR23093.
+
+ LibFunc Func;
+ Function *Callee = CI->getCalledFunction();
+
+ SmallVector<OperandBundleDef, 2> OpBundles;
+ CI->getOperandBundlesAsDefs(OpBundles);
+ IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles);
+ bool isCallingConvC = isCallingConvCCompatible(CI);
+
+ // First, check that this is a known library functions and that the prototype
+ // is correct.
+ if (!TLI->getLibFunc(*Callee, Func))
+ return nullptr;
+
+ // We never change the calling convention.
+ if (!ignoreCallingConv(Func) && !isCallingConvC)
+ return nullptr;
+
+ switch (Func) {
+ case LibFunc_memcpy_chk:
+ return optimizeMemCpyChk(CI, Builder);
+ case LibFunc_memmove_chk:
+ return optimizeMemMoveChk(CI, Builder);
+ case LibFunc_memset_chk:
+ return optimizeMemSetChk(CI, Builder);
+ case LibFunc_stpcpy_chk:
+ case LibFunc_strcpy_chk:
+ return optimizeStrpCpyChk(CI, Builder, Func);
+ case LibFunc_stpncpy_chk:
+ case LibFunc_strncpy_chk:
+ return optimizeStrpNCpyChk(CI, Builder, Func);
+ default:
+ break;
+ }
+ return nullptr;
+}
+
+FortifiedLibCallSimplifier::FortifiedLibCallSimplifier(
+ const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize)
+ : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {}
diff --git a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp
new file mode 100644
index 000000000000..e9a368f4faa4
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp
@@ -0,0 +1,263 @@
+//===- SplitModule.cpp - Split a module into partitions -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the function llvm::SplitModule, which splits a module
+// into multiple linkable partitions. It can be used to implement parallel code
+// generation for link-time optimization.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "split-module"
+
+#include "llvm/Transforms/Utils/SplitModule.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <queue>
+
+using namespace llvm;
+
+namespace {
+typedef EquivalenceClasses<const GlobalValue *> ClusterMapType;
+typedef DenseMap<const Comdat *, const GlobalValue *> ComdatMembersType;
+typedef DenseMap<const GlobalValue *, unsigned> ClusterIDMapType;
+}
+
+static void addNonConstUser(ClusterMapType &GVtoClusterMap,
+ const GlobalValue *GV, const User *U) {
+ assert((!isa<Constant>(U) || isa<GlobalValue>(U)) && "Bad user");
+
+ if (const Instruction *I = dyn_cast<Instruction>(U)) {
+ const GlobalValue *F = I->getParent()->getParent();
+ GVtoClusterMap.unionSets(GV, F);
+ } else if (isa<GlobalIndirectSymbol>(U) || isa<Function>(U) ||
+ isa<GlobalVariable>(U)) {
+ GVtoClusterMap.unionSets(GV, cast<GlobalValue>(U));
+ } else {
+ llvm_unreachable("Underimplemented use case");
+ }
+}
+
+// Adds all GlobalValue users of V to the same cluster as GV.
+static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap,
+ const GlobalValue *GV, const Value *V) {
+ for (auto *U : V->users()) {
+ SmallVector<const User *, 4> Worklist;
+ Worklist.push_back(U);
+ while (!Worklist.empty()) {
+ const User *UU = Worklist.pop_back_val();
+ // For each constant that is not a GV (a pure const) recurse.
+ if (isa<Constant>(UU) && !isa<GlobalValue>(UU)) {
+ Worklist.append(UU->user_begin(), UU->user_end());
+ continue;
+ }
+ addNonConstUser(GVtoClusterMap, GV, UU);
+ }
+ }
+}
+
+// Find partitions for module in the way that no locals need to be
+// globalized.
+// Try to balance pack those partitions into N files since this roughly equals
+// thread balancing for the backend codegen step.
+static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,
+ unsigned N) {
+ // At this point module should have the proper mix of globals and locals.
+ // As we attempt to partition this module, we must not change any
+ // locals to globals.
+ DEBUG(dbgs() << "Partition module with (" << M->size() << ")functions\n");
+ ClusterMapType GVtoClusterMap;
+ ComdatMembersType ComdatMembers;
+
+ auto recordGVSet = [&GVtoClusterMap, &ComdatMembers](GlobalValue &GV) {
+ if (GV.isDeclaration())
+ return;
+
+ if (!GV.hasName())
+ GV.setName("__llvmsplit_unnamed");
+
+ // Comdat groups must not be partitioned. For comdat groups that contain
+ // locals, record all their members here so we can keep them together.
+ // Comdat groups that only contain external globals are already handled by
+ // the MD5-based partitioning.
+ if (const Comdat *C = GV.getComdat()) {
+ auto &Member = ComdatMembers[C];
+ if (Member)
+ GVtoClusterMap.unionSets(Member, &GV);
+ else
+ Member = &GV;
+ }
+
+ // For aliases we should not separate them from their aliasees regardless
+ // of linkage.
+ if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(&GV)) {
+ if (const GlobalObject *Base = GIS->getBaseObject())
+ GVtoClusterMap.unionSets(&GV, Base);
+ }
+
+ if (const Function *F = dyn_cast<Function>(&GV)) {
+ for (const BasicBlock &BB : *F) {
+ BlockAddress *BA = BlockAddress::lookup(&BB);
+ if (!BA || !BA->isConstantUsed())
+ continue;
+ addAllGlobalValueUsers(GVtoClusterMap, F, BA);
+ }
+ }
+
+ if (GV.hasLocalLinkage())
+ addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV);
+ };
+
+ std::for_each(M->begin(), M->end(), recordGVSet);
+ std::for_each(M->global_begin(), M->global_end(), recordGVSet);
+ std::for_each(M->alias_begin(), M->alias_end(), recordGVSet);
+
+ // Assigned all GVs to merged clusters while balancing number of objects in
+ // each.
+ auto CompareClusters = [](const std::pair<unsigned, unsigned> &a,
+ const std::pair<unsigned, unsigned> &b) {
+ if (a.second || b.second)
+ return a.second > b.second;
+ else
+ return a.first > b.first;
+ };
+
+ std::priority_queue<std::pair<unsigned, unsigned>,
+ std::vector<std::pair<unsigned, unsigned>>,
+ decltype(CompareClusters)>
+ BalancinQueue(CompareClusters);
+ // Pre-populate priority queue with N slot blanks.
+ for (unsigned i = 0; i < N; ++i)
+ BalancinQueue.push(std::make_pair(i, 0));
+
+ typedef std::pair<unsigned, ClusterMapType::iterator> SortType;
+ SmallVector<SortType, 64> Sets;
+ SmallPtrSet<const GlobalValue *, 32> Visited;
+
+ // To guarantee determinism, we have to sort SCC according to size.
+ // When size is the same, use leader's name.
+ for (ClusterMapType::iterator I = GVtoClusterMap.begin(),
+ E = GVtoClusterMap.end(); I != E; ++I)
+ if (I->isLeader())
+ Sets.push_back(
+ std::make_pair(std::distance(GVtoClusterMap.member_begin(I),
+ GVtoClusterMap.member_end()), I));
+
+ std::sort(Sets.begin(), Sets.end(), [](const SortType &a, const SortType &b) {
+ if (a.first == b.first)
+ return a.second->getData()->getName() > b.second->getData()->getName();
+ else
+ return a.first > b.first;
+ });
+
+ for (auto &I : Sets) {
+ unsigned CurrentClusterID = BalancinQueue.top().first;
+ unsigned CurrentClusterSize = BalancinQueue.top().second;
+ BalancinQueue.pop();
+
+ DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size(" << I.first
+ << ") ----> " << I.second->getData()->getName() << "\n");
+
+ for (ClusterMapType::member_iterator MI =
+ GVtoClusterMap.findLeader(I.second);
+ MI != GVtoClusterMap.member_end(); ++MI) {
+ if (!Visited.insert(*MI).second)
+ continue;
+ DEBUG(dbgs() << "----> " << (*MI)->getName()
+ << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n");
+ Visited.insert(*MI);
+ ClusterIDMap[*MI] = CurrentClusterID;
+ CurrentClusterSize++;
+ }
+ // Add this set size to the number of entries in this cluster.
+ BalancinQueue.push(std::make_pair(CurrentClusterID, CurrentClusterSize));
+ }
+}
+
+static void externalize(GlobalValue *GV) {
+ if (GV->hasLocalLinkage()) {
+ GV->setLinkage(GlobalValue::ExternalLinkage);
+ GV->setVisibility(GlobalValue::HiddenVisibility);
+ }
+
+ // Unnamed entities must be named consistently between modules. setName will
+ // give a distinct name to each such entity.
+ if (!GV->hasName())
+ GV->setName("__llvmsplit_unnamed");
+}
+
+// Returns whether GV should be in partition (0-based) I of N.
+static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) {
+ if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(GV))
+ if (const GlobalObject *Base = GIS->getBaseObject())
+ GV = Base;
+
+ StringRef Name;
+ if (const Comdat *C = GV->getComdat())
+ Name = C->getName();
+ else
+ Name = GV->getName();
+
+ // Partition by MD5 hash. We only need a few bits for evenness as the number
+ // of partitions will generally be in the 1-2 figure range; the low 16 bits
+ // are enough.
+ MD5 H;
+ MD5::MD5Result R;
+ H.update(Name);
+ H.final(R);
+ return (R[0] | (R[1] << 8)) % N == I;
+}
+
+void llvm::SplitModule(
+ std::unique_ptr<Module> M, unsigned N,
+ function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback,
+ bool PreserveLocals) {
+ if (!PreserveLocals) {
+ for (Function &F : *M)
+ externalize(&F);
+ for (GlobalVariable &GV : M->globals())
+ externalize(&GV);
+ for (GlobalAlias &GA : M->aliases())
+ externalize(&GA);
+ for (GlobalIFunc &GIF : M->ifuncs())
+ externalize(&GIF);
+ }
+
+ // This performs splitting without a need for externalization, which might not
+ // always be possible.
+ ClusterIDMapType ClusterIDMap;
+ findPartitions(M.get(), ClusterIDMap, N);
+
+ // FIXME: We should be able to reuse M as the last partition instead of
+ // cloning it.
+ for (unsigned I = 0; I < N; ++I) {
+ ValueToValueMapTy VMap;
+ std::unique_ptr<Module> MPart(
+ CloneModule(M.get(), VMap, [&](const GlobalValue *GV) {
+ if (ClusterIDMap.count(GV))
+ return (ClusterIDMap[GV] == I);
+ else
+ return isInPartition(GV, I, N);
+ }));
+ if (I != 0)
+ MPart->setModuleInlineAsm("");
+ ModuleCallback(std::move(MPart));
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
new file mode 100644
index 000000000000..49dc15cf5e7c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
@@ -0,0 +1,80 @@
+//===- StripGCRelocates.cpp - Remove gc.relocates inserted by RewriteStatePoints===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a little utility pass that removes the gc.relocates inserted by
+// RewriteStatepointsForGC. Note that the generated IR is incorrect,
+// but this is useful as a single pass in itself, for analysis of IR, without
+// the GC.relocates. The statepoint and gc.result instrinsics would still be
+// present.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+namespace {
+struct StripGCRelocates : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ StripGCRelocates() : FunctionPass(ID) {
+ initializeStripGCRelocatesPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &Info) const override {}
+
+ bool runOnFunction(Function &F) override;
+
+};
+char StripGCRelocates::ID = 0;
+}
+
+bool StripGCRelocates::runOnFunction(Function &F) {
+ // Nothing to do for declarations.
+ if (F.isDeclaration())
+ return false;
+ SmallVector<GCRelocateInst *, 20> GCRelocates;
+ // TODO: We currently do not handle gc.relocates that are in landing pads,
+ // i.e. not bound to a single statepoint token.
+ for (Instruction &I : instructions(F)) {
+ if (auto *GCR = dyn_cast<GCRelocateInst>(&I))
+ if (isStatepoint(GCR->getOperand(0)))
+ GCRelocates.push_back(GCR);
+ }
+ // All gc.relocates are bound to a single statepoint token. The order of
+ // visiting gc.relocates for deletion does not matter.
+ for (GCRelocateInst *GCRel : GCRelocates) {
+ Value *OrigPtr = GCRel->getDerivedPtr();
+ Value *ReplaceGCRel = OrigPtr;
+
+ // All gc_relocates are i8 addrspace(1)* typed, we need a bitcast from i8
+ // addrspace(1)* to the type of the OrigPtr, if the are not the same.
+ if (GCRel->getType() != OrigPtr->getType())
+ ReplaceGCRel = new BitCastInst(OrigPtr, GCRel->getType(), "cast", GCRel);
+
+ // Replace all uses of gc.relocate and delete the gc.relocate
+ // There maybe unncessary bitcasts back to the OrigPtr type, an instcombine
+ // pass would clear this up.
+ GCRel->replaceAllUsesWith(ReplaceGCRel);
+ GCRel->eraseFromParent();
+ }
+ return !GCRelocates.empty();
+}
+
+INITIALIZE_PASS(StripGCRelocates, "strip-gc-relocates",
+ "Strip gc.relocates inserted through RewriteStatepointsForGC",
+ true, false)
+FunctionPass *llvm::createStripGCRelocatesPass() {
+ return new StripGCRelocates();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
new file mode 100644
index 000000000000..cd0378e0140c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
@@ -0,0 +1,42 @@
+//===- StripNonLineTableDebugInfo.cpp -- Strip parts of Debug Info --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
+using namespace llvm;
+
+namespace {
+
+/// This pass strips all debug info that is not related line tables.
+/// The result will be the same as if the program where compiled with
+/// -gline-tables-only.
+struct StripNonLineTableDebugInfo : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ StripNonLineTableDebugInfo() : ModulePass(ID) {
+ initializeStripNonLineTableDebugInfoPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+
+ bool runOnModule(Module &M) override {
+ return llvm::stripNonLineTableDebugInfo(M);
+ }
+};
+}
+
+char StripNonLineTableDebugInfo::ID = 0;
+INITIALIZE_PASS(StripNonLineTableDebugInfo, "strip-nonlinetable-debuginfo",
+ "Strip all debug info except linetables", false, false)
+
+ModulePass *llvm::createStripNonLineTableDebugInfoPass() {
+ return new StripNonLineTableDebugInfo();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
new file mode 100644
index 000000000000..20107553665f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -0,0 +1,565 @@
+//===- SymbolRewriter.cpp - Symbol Rewriter ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SymbolRewriter is a LLVM pass which can rewrite symbols transparently within
+// existing code. It is implemented as a compiler pass and is configured via a
+// YAML configuration file.
+//
+// The YAML configuration file format is as follows:
+//
+// RewriteMapFile := RewriteDescriptors
+// RewriteDescriptors := RewriteDescriptor | RewriteDescriptors
+// RewriteDescriptor := RewriteDescriptorType ':' '{' RewriteDescriptorFields '}'
+// RewriteDescriptorFields := RewriteDescriptorField | RewriteDescriptorFields
+// RewriteDescriptorField := FieldIdentifier ':' FieldValue ','
+// RewriteDescriptorType := Identifier
+// FieldIdentifier := Identifier
+// FieldValue := Identifier
+// Identifier := [0-9a-zA-Z]+
+//
+// Currently, the following descriptor types are supported:
+//
+// - function: (function rewriting)
+// + Source (original name of the function)
+// + Target (explicit transformation)
+// + Transform (pattern transformation)
+// + Naked (boolean, whether the function is undecorated)
+// - global variable: (external linkage global variable rewriting)
+// + Source (original name of externally visible variable)
+// + Target (explicit transformation)
+// + Transform (pattern transformation)
+// - global alias: (global alias rewriting)
+// + Source (original name of the aliased name)
+// + Target (explicit transformation)
+// + Transform (pattern transformation)
+//
+// Note that source and exactly one of [Target, Transform] must be provided
+//
+// New rewrite descriptors can be created. Addding a new rewrite descriptor
+// involves:
+//
+// a) extended the rewrite descriptor kind enumeration
+// (<anonymous>::RewriteDescriptor::RewriteDescriptorType)
+// b) implementing the new descriptor
+// (c.f. <anonymous>::ExplicitRewriteFunctionDescriptor)
+// c) extending the rewrite map parser
+// (<anonymous>::RewriteMapParser::parseEntry)
+//
+// Specify to rewrite the symbols using the `-rewrite-symbols` option, and
+// specify the map file to use for the rewriting via the `-rewrite-map-file`
+// option.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "symbol-rewriter"
+#include "llvm/Transforms/Utils/SymbolRewriter.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLParser.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace SymbolRewriter;
+
+static cl::list<std::string> RewriteMapFiles("rewrite-map-file",
+ cl::desc("Symbol Rewrite Map"),
+ cl::value_desc("filename"));
+
+static void rewriteComdat(Module &M, GlobalObject *GO,
+ const std::string &Source,
+ const std::string &Target) {
+ if (Comdat *CD = GO->getComdat()) {
+ auto &Comdats = M.getComdatSymbolTable();
+
+ Comdat *C = M.getOrInsertComdat(Target);
+ C->setSelectionKind(CD->getSelectionKind());
+ GO->setComdat(C);
+
+ Comdats.erase(Comdats.find(Source));
+ }
+}
+
+namespace {
+template <RewriteDescriptor::Type DT, typename ValueType,
+ ValueType *(llvm::Module::*Get)(StringRef) const>
+class ExplicitRewriteDescriptor : public RewriteDescriptor {
+public:
+ const std::string Source;
+ const std::string Target;
+
+ ExplicitRewriteDescriptor(StringRef S, StringRef T, const bool Naked)
+ : RewriteDescriptor(DT), Source(Naked ? StringRef("\01" + S.str()) : S),
+ Target(T) {}
+
+ bool performOnModule(Module &M) override;
+
+ static bool classof(const RewriteDescriptor *RD) {
+ return RD->getType() == DT;
+ }
+};
+
+template <RewriteDescriptor::Type DT, typename ValueType,
+ ValueType *(llvm::Module::*Get)(StringRef) const>
+bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) {
+ bool Changed = false;
+ if (ValueType *S = (M.*Get)(Source)) {
+ if (GlobalObject *GO = dyn_cast<GlobalObject>(S))
+ rewriteComdat(M, GO, Source, Target);
+
+ if (Value *T = (M.*Get)(Target))
+ S->setValueName(T->getValueName());
+ else
+ S->setName(Target);
+
+ Changed = true;
+ }
+ return Changed;
+}
+
+template <RewriteDescriptor::Type DT, typename ValueType,
+ ValueType *(llvm::Module::*Get)(StringRef) const,
+ iterator_range<typename iplist<ValueType>::iterator>
+ (llvm::Module::*Iterator)()>
+class PatternRewriteDescriptor : public RewriteDescriptor {
+public:
+ const std::string Pattern;
+ const std::string Transform;
+
+ PatternRewriteDescriptor(StringRef P, StringRef T)
+ : RewriteDescriptor(DT), Pattern(P), Transform(T) { }
+
+ bool performOnModule(Module &M) override;
+
+ static bool classof(const RewriteDescriptor *RD) {
+ return RD->getType() == DT;
+ }
+};
+
+template <RewriteDescriptor::Type DT, typename ValueType,
+ ValueType *(llvm::Module::*Get)(StringRef) const,
+ iterator_range<typename iplist<ValueType>::iterator>
+ (llvm::Module::*Iterator)()>
+bool PatternRewriteDescriptor<DT, ValueType, Get, Iterator>::
+performOnModule(Module &M) {
+ bool Changed = false;
+ for (auto &C : (M.*Iterator)()) {
+ std::string Error;
+
+ std::string Name = Regex(Pattern).sub(Transform, C.getName(), &Error);
+ if (!Error.empty())
+ report_fatal_error("unable to transforn " + C.getName() + " in " +
+ M.getModuleIdentifier() + ": " + Error);
+
+ if (C.getName() == Name)
+ continue;
+
+ if (GlobalObject *GO = dyn_cast<GlobalObject>(&C))
+ rewriteComdat(M, GO, C.getName(), Name);
+
+ if (Value *V = (M.*Get)(Name))
+ C.setValueName(V->getValueName());
+ else
+ C.setName(Name);
+
+ Changed = true;
+ }
+ return Changed;
+}
+
+/// Represents a rewrite for an explicitly named (function) symbol. Both the
+/// source function name and target function name of the transformation are
+/// explicitly spelt out.
+typedef ExplicitRewriteDescriptor<RewriteDescriptor::Type::Function,
+ llvm::Function, &llvm::Module::getFunction>
+ ExplicitRewriteFunctionDescriptor;
+
+/// Represents a rewrite for an explicitly named (global variable) symbol. Both
+/// the source variable name and target variable name are spelt out. This
+/// applies only to module level variables.
+typedef ExplicitRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
+ llvm::GlobalVariable,
+ &llvm::Module::getGlobalVariable>
+ ExplicitRewriteGlobalVariableDescriptor;
+
+/// Represents a rewrite for an explicitly named global alias. Both the source
+/// and target name are explicitly spelt out.
+typedef ExplicitRewriteDescriptor<RewriteDescriptor::Type::NamedAlias,
+ llvm::GlobalAlias,
+ &llvm::Module::getNamedAlias>
+ ExplicitRewriteNamedAliasDescriptor;
+
+/// Represents a rewrite for a regular expression based pattern for functions.
+/// A pattern for the function name is provided and a transformation for that
+/// pattern to determine the target function name create the rewrite rule.
+typedef PatternRewriteDescriptor<RewriteDescriptor::Type::Function,
+ llvm::Function, &llvm::Module::getFunction,
+ &llvm::Module::functions>
+ PatternRewriteFunctionDescriptor;
+
+/// Represents a rewrite for a global variable based upon a matching pattern.
+/// Each global variable matching the provided pattern will be transformed as
+/// described in the transformation pattern for the target. Applies only to
+/// module level variables.
+typedef PatternRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
+ llvm::GlobalVariable,
+ &llvm::Module::getGlobalVariable,
+ &llvm::Module::globals>
+ PatternRewriteGlobalVariableDescriptor;
+
+/// PatternRewriteNamedAliasDescriptor - represents a rewrite for global
+/// aliases which match a given pattern. The provided transformation will be
+/// applied to each of the matching names.
+typedef PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias,
+ llvm::GlobalAlias,
+ &llvm::Module::getNamedAlias,
+ &llvm::Module::aliases>
+ PatternRewriteNamedAliasDescriptor;
+} // namespace
+
+bool RewriteMapParser::parse(const std::string &MapFile,
+ RewriteDescriptorList *DL) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> Mapping =
+ MemoryBuffer::getFile(MapFile);
+
+ if (!Mapping)
+ report_fatal_error("unable to read rewrite map '" + MapFile + "': " +
+ Mapping.getError().message());
+
+ if (!parse(*Mapping, DL))
+ report_fatal_error("unable to parse rewrite map '" + MapFile + "'");
+
+ return true;
+}
+
+bool RewriteMapParser::parse(std::unique_ptr<MemoryBuffer> &MapFile,
+ RewriteDescriptorList *DL) {
+ SourceMgr SM;
+ yaml::Stream YS(MapFile->getBuffer(), SM);
+
+ for (auto &Document : YS) {
+ yaml::MappingNode *DescriptorList;
+
+ // ignore empty documents
+ if (isa<yaml::NullNode>(Document.getRoot()))
+ continue;
+
+ DescriptorList = dyn_cast<yaml::MappingNode>(Document.getRoot());
+ if (!DescriptorList) {
+ YS.printError(Document.getRoot(), "DescriptorList node must be a map");
+ return false;
+ }
+
+ for (auto &Descriptor : *DescriptorList)
+ if (!parseEntry(YS, Descriptor, DL))
+ return false;
+ }
+
+ return true;
+}
+
+bool RewriteMapParser::parseEntry(yaml::Stream &YS, yaml::KeyValueNode &Entry,
+ RewriteDescriptorList *DL) {
+ yaml::ScalarNode *Key;
+ yaml::MappingNode *Value;
+ SmallString<32> KeyStorage;
+ StringRef RewriteType;
+
+ Key = dyn_cast<yaml::ScalarNode>(Entry.getKey());
+ if (!Key) {
+ YS.printError(Entry.getKey(), "rewrite type must be a scalar");
+ return false;
+ }
+
+ Value = dyn_cast<yaml::MappingNode>(Entry.getValue());
+ if (!Value) {
+ YS.printError(Entry.getValue(), "rewrite descriptor must be a map");
+ return false;
+ }
+
+ RewriteType = Key->getValue(KeyStorage);
+ if (RewriteType.equals("function"))
+ return parseRewriteFunctionDescriptor(YS, Key, Value, DL);
+ else if (RewriteType.equals("global variable"))
+ return parseRewriteGlobalVariableDescriptor(YS, Key, Value, DL);
+ else if (RewriteType.equals("global alias"))
+ return parseRewriteGlobalAliasDescriptor(YS, Key, Value, DL);
+
+ YS.printError(Entry.getKey(), "unknown rewrite type");
+ return false;
+}
+
+bool RewriteMapParser::
+parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
+ yaml::MappingNode *Descriptor,
+ RewriteDescriptorList *DL) {
+ bool Naked = false;
+ std::string Source;
+ std::string Target;
+ std::string Transform;
+
+ for (auto &Field : *Descriptor) {
+ yaml::ScalarNode *Key;
+ yaml::ScalarNode *Value;
+ SmallString<32> KeyStorage;
+ SmallString<32> ValueStorage;
+ StringRef KeyValue;
+
+ Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
+ if (!Key) {
+ YS.printError(Field.getKey(), "descriptor key must be a scalar");
+ return false;
+ }
+
+ Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
+ if (!Value) {
+ YS.printError(Field.getValue(), "descriptor value must be a scalar");
+ return false;
+ }
+
+ KeyValue = Key->getValue(KeyStorage);
+ if (KeyValue.equals("source")) {
+ std::string Error;
+
+ Source = Value->getValue(ValueStorage);
+ if (!Regex(Source).isValid(Error)) {
+ YS.printError(Field.getKey(), "invalid regex: " + Error);
+ return false;
+ }
+ } else if (KeyValue.equals("target")) {
+ Target = Value->getValue(ValueStorage);
+ } else if (KeyValue.equals("transform")) {
+ Transform = Value->getValue(ValueStorage);
+ } else if (KeyValue.equals("naked")) {
+ std::string Undecorated;
+
+ Undecorated = Value->getValue(ValueStorage);
+ Naked = StringRef(Undecorated).lower() == "true" || Undecorated == "1";
+ } else {
+ YS.printError(Field.getKey(), "unknown key for function");
+ return false;
+ }
+ }
+
+ if (Transform.empty() == Target.empty()) {
+ YS.printError(Descriptor,
+ "exactly one of transform or target must be specified");
+ return false;
+ }
+
+ // TODO see if there is a more elegant solution to selecting the rewrite
+ // descriptor type
+ if (!Target.empty())
+ DL->push_back(llvm::make_unique<ExplicitRewriteFunctionDescriptor>(
+ Source, Target, Naked));
+ else
+ DL->push_back(
+ llvm::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform));
+
+ return true;
+}
+
+bool RewriteMapParser::
+parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
+ yaml::MappingNode *Descriptor,
+ RewriteDescriptorList *DL) {
+ std::string Source;
+ std::string Target;
+ std::string Transform;
+
+ for (auto &Field : *Descriptor) {
+ yaml::ScalarNode *Key;
+ yaml::ScalarNode *Value;
+ SmallString<32> KeyStorage;
+ SmallString<32> ValueStorage;
+ StringRef KeyValue;
+
+ Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
+ if (!Key) {
+ YS.printError(Field.getKey(), "descriptor Key must be a scalar");
+ return false;
+ }
+
+ Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
+ if (!Value) {
+ YS.printError(Field.getValue(), "descriptor value must be a scalar");
+ return false;
+ }
+
+ KeyValue = Key->getValue(KeyStorage);
+ if (KeyValue.equals("source")) {
+ std::string Error;
+
+ Source = Value->getValue(ValueStorage);
+ if (!Regex(Source).isValid(Error)) {
+ YS.printError(Field.getKey(), "invalid regex: " + Error);
+ return false;
+ }
+ } else if (KeyValue.equals("target")) {
+ Target = Value->getValue(ValueStorage);
+ } else if (KeyValue.equals("transform")) {
+ Transform = Value->getValue(ValueStorage);
+ } else {
+ YS.printError(Field.getKey(), "unknown Key for Global Variable");
+ return false;
+ }
+ }
+
+ if (Transform.empty() == Target.empty()) {
+ YS.printError(Descriptor,
+ "exactly one of transform or target must be specified");
+ return false;
+ }
+
+ if (!Target.empty())
+ DL->push_back(llvm::make_unique<ExplicitRewriteGlobalVariableDescriptor>(
+ Source, Target,
+ /*Naked*/ false));
+ else
+ DL->push_back(llvm::make_unique<PatternRewriteGlobalVariableDescriptor>(
+ Source, Transform));
+
+ return true;
+}
+
+bool RewriteMapParser::
+parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
+ yaml::MappingNode *Descriptor,
+ RewriteDescriptorList *DL) {
+ std::string Source;
+ std::string Target;
+ std::string Transform;
+
+ for (auto &Field : *Descriptor) {
+ yaml::ScalarNode *Key;
+ yaml::ScalarNode *Value;
+ SmallString<32> KeyStorage;
+ SmallString<32> ValueStorage;
+ StringRef KeyValue;
+
+ Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
+ if (!Key) {
+ YS.printError(Field.getKey(), "descriptor key must be a scalar");
+ return false;
+ }
+
+ Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
+ if (!Value) {
+ YS.printError(Field.getValue(), "descriptor value must be a scalar");
+ return false;
+ }
+
+ KeyValue = Key->getValue(KeyStorage);
+ if (KeyValue.equals("source")) {
+ std::string Error;
+
+ Source = Value->getValue(ValueStorage);
+ if (!Regex(Source).isValid(Error)) {
+ YS.printError(Field.getKey(), "invalid regex: " + Error);
+ return false;
+ }
+ } else if (KeyValue.equals("target")) {
+ Target = Value->getValue(ValueStorage);
+ } else if (KeyValue.equals("transform")) {
+ Transform = Value->getValue(ValueStorage);
+ } else {
+ YS.printError(Field.getKey(), "unknown key for Global Alias");
+ return false;
+ }
+ }
+
+ if (Transform.empty() == Target.empty()) {
+ YS.printError(Descriptor,
+ "exactly one of transform or target must be specified");
+ return false;
+ }
+
+ if (!Target.empty())
+ DL->push_back(llvm::make_unique<ExplicitRewriteNamedAliasDescriptor>(
+ Source, Target,
+ /*Naked*/ false));
+ else
+ DL->push_back(llvm::make_unique<PatternRewriteNamedAliasDescriptor>(
+ Source, Transform));
+
+ return true;
+}
+
+namespace {
+class RewriteSymbolsLegacyPass : public ModulePass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ RewriteSymbolsLegacyPass();
+ RewriteSymbolsLegacyPass(SymbolRewriter::RewriteDescriptorList &DL);
+
+ bool runOnModule(Module &M) override;
+
+private:
+ RewriteSymbolPass Impl;
+};
+
+char RewriteSymbolsLegacyPass::ID = 0;
+
+RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID), Impl() {
+ initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry());
+}
+
+RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass(
+ SymbolRewriter::RewriteDescriptorList &DL)
+ : ModulePass(ID), Impl(DL) {}
+
+bool RewriteSymbolsLegacyPass::runOnModule(Module &M) {
+ return Impl.runImpl(M);
+}
+}
+
+namespace llvm {
+PreservedAnalyses RewriteSymbolPass::run(Module &M, ModuleAnalysisManager &AM) {
+ if (!runImpl(M))
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
+
+bool RewriteSymbolPass::runImpl(Module &M) {
+ bool Changed;
+
+ Changed = false;
+ for (auto &Descriptor : Descriptors)
+ Changed |= Descriptor->performOnModule(M);
+
+ return Changed;
+}
+
+void RewriteSymbolPass::loadAndParseMapFiles() {
+ const std::vector<std::string> MapFiles(RewriteMapFiles);
+ SymbolRewriter::RewriteMapParser Parser;
+
+ for (const auto &MapFile : MapFiles)
+ Parser.parse(MapFile, &Descriptors);
+}
+}
+
+INITIALIZE_PASS(RewriteSymbolsLegacyPass, "rewrite-symbols", "Rewrite Symbols",
+ false, false)
+
+ModulePass *llvm::createRewriteSymbolsPass() {
+ return new RewriteSymbolsLegacyPass();
+}
+
+ModulePass *
+llvm::createRewriteSymbolsPass(SymbolRewriter::RewriteDescriptorList &DL) {
+ return new RewriteSymbolsLegacyPass(DL);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
new file mode 100644
index 000000000000..9385f825523c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -0,0 +1,116 @@
+//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to ensure that functions have at most one return
+// instruction in them. Additionally, it keeps track of which node is the new
+// exit node of the CFG. If there are no exit nodes in the CFG, the getExitNode
+// method will return a null pointer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+char UnifyFunctionExitNodes::ID = 0;
+INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn",
+ "Unify function exit nodes", false, false)
+
+Pass *llvm::createUnifyFunctionExitNodesPass() {
+ return new UnifyFunctionExitNodes();
+}
+
+void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
+ // We preserve the non-critical-edgeness property
+ AU.addPreservedID(BreakCriticalEdgesID);
+ // This is a cluster of orthogonal Transforms
+ AU.addPreservedID(LowerSwitchID);
+}
+
+// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new
+// BasicBlock, and converting all returns to unconditional branches to this
+// new basic block. The singular exit node is returned.
+//
+// If there are no return stmts in the Function, a null pointer is returned.
+//
+bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
+ // Loop over all of the blocks in a function, tracking all of the blocks that
+ // return.
+ //
+ std::vector<BasicBlock*> ReturningBlocks;
+ std::vector<BasicBlock*> UnreachableBlocks;
+ for (BasicBlock &I : F)
+ if (isa<ReturnInst>(I.getTerminator()))
+ ReturningBlocks.push_back(&I);
+ else if (isa<UnreachableInst>(I.getTerminator()))
+ UnreachableBlocks.push_back(&I);
+
+ // Then unreachable blocks.
+ if (UnreachableBlocks.empty()) {
+ UnreachableBlock = nullptr;
+ } else if (UnreachableBlocks.size() == 1) {
+ UnreachableBlock = UnreachableBlocks.front();
+ } else {
+ UnreachableBlock = BasicBlock::Create(F.getContext(),
+ "UnifiedUnreachableBlock", &F);
+ new UnreachableInst(F.getContext(), UnreachableBlock);
+
+ for (BasicBlock *BB : UnreachableBlocks) {
+ BB->getInstList().pop_back(); // Remove the unreachable inst.
+ BranchInst::Create(UnreachableBlock, BB);
+ }
+ }
+
+ // Now handle return blocks.
+ if (ReturningBlocks.empty()) {
+ ReturnBlock = nullptr;
+ return false; // No blocks return
+ } else if (ReturningBlocks.size() == 1) {
+ ReturnBlock = ReturningBlocks.front(); // Already has a single return block
+ return false;
+ }
+
+ // Otherwise, we need to insert a new basic block into the function, add a PHI
+ // nodes (if the function returns values), and convert all of the return
+ // instructions into unconditional branches.
+ //
+ BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
+ "UnifiedReturnBlock", &F);
+
+ PHINode *PN = nullptr;
+ if (F.getReturnType()->isVoidTy()) {
+ ReturnInst::Create(F.getContext(), nullptr, NewRetBlock);
+ } else {
+ // If the function doesn't return void... add a PHI node to the block...
+ PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
+ "UnifiedRetVal");
+ NewRetBlock->getInstList().push_back(PN);
+ ReturnInst::Create(F.getContext(), PN, NewRetBlock);
+ }
+
+ // Loop over all of the blocks, replacing the return instruction with an
+ // unconditional branch.
+ //
+ for (BasicBlock *BB : ReturningBlocks) {
+ // Add an incoming element to the PHI node for every return instruction that
+ // is merging into this new block...
+ if (PN)
+ PN->addIncoming(BB->getTerminator()->getOperand(0), BB);
+
+ BB->getInstList().pop_back(); // Remove the return insn
+ BranchInst::Create(NewRetBlock, BB);
+ }
+ ReturnBlock = NewRetBlock;
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
new file mode 100644
index 000000000000..f6c7d1c4989e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
@@ -0,0 +1,45 @@
+//===-- Utils.cpp - TransformUtils Infrastructure -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common initialization infrastructure for the
+// TransformUtils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
+
+using namespace llvm;
+
+/// initializeTransformUtils - Initialize all passes in the TransformUtils
+/// library.
+void llvm::initializeTransformUtils(PassRegistry &Registry) {
+ initializeAddDiscriminatorsLegacyPassPass(Registry);
+ initializeBreakCriticalEdgesPass(Registry);
+ initializeInstNamerPass(Registry);
+ initializeLCSSAWrapperPassPass(Registry);
+ initializeLibCallsShrinkWrapLegacyPassPass(Registry);
+ initializeLoopSimplifyPass(Registry);
+ initializeLowerInvokeLegacyPassPass(Registry);
+ initializeLowerSwitchPass(Registry);
+ initializeNameAnonGlobalLegacyPassPass(Registry);
+ initializePromoteLegacyPassPass(Registry);
+ initializeStripNonLineTableDebugInfoPass(Registry);
+ initializeUnifyFunctionExitNodesPass(Registry);
+ initializeInstSimplifierPass(Registry);
+ initializeMetaRenamerPass(Registry);
+ initializeStripGCRelocatesPass(Registry);
+ initializePredicateInfoPrinterLegacyPassPass(Registry);
+}
+
+/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
+void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) {
+ initializeTransformUtils(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp b/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp
new file mode 100644
index 000000000000..c3feea6a0a41
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp
@@ -0,0 +1,495 @@
+#include "llvm/Transforms/Utils/VNCoercion.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "vncoerce"
+namespace llvm {
+namespace VNCoercion {
+
+/// Return true if coerceAvailableValueToLoadType will succeed.
+bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
+ const DataLayout &DL) {
+ // If the loaded or stored value is an first class array or struct, don't try
+ // to transform them. We need to be able to bitcast to integer.
+ if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
+ StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy())
+ return false;
+
+ // The store has to be at least as big as the load.
+ if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy))
+ return false;
+
+ // Don't coerce non-integral pointers to integers or vice versa.
+ if (DL.isNonIntegralPointerType(StoredVal->getType()) !=
+ DL.isNonIntegralPointerType(LoadTy))
+ return false;
+
+ return true;
+}
+
+template <class T, class HelperClass>
+static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy,
+ HelperClass &Helper,
+ const DataLayout &DL) {
+ assert(canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) &&
+ "precondition violation - materialization can't fail");
+ if (auto *C = dyn_cast<Constant>(StoredVal))
+ if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
+ StoredVal = FoldedStoredVal;
+
+ // If this is already the right type, just return it.
+ Type *StoredValTy = StoredVal->getType();
+
+ uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy);
+ uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy);
+
+ // If the store and reload are the same size, we can always reuse it.
+ if (StoredValSize == LoadedValSize) {
+ // Pointer to Pointer -> use bitcast.
+ if (StoredValTy->isPtrOrPtrVectorTy() && LoadedTy->isPtrOrPtrVectorTy()) {
+ StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy);
+ } else {
+ // Convert source pointers to integers, which can be bitcast.
+ if (StoredValTy->isPtrOrPtrVectorTy()) {
+ StoredValTy = DL.getIntPtrType(StoredValTy);
+ StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy);
+ }
+
+ Type *TypeToCastTo = LoadedTy;
+ if (TypeToCastTo->isPtrOrPtrVectorTy())
+ TypeToCastTo = DL.getIntPtrType(TypeToCastTo);
+
+ if (StoredValTy != TypeToCastTo)
+ StoredVal = Helper.CreateBitCast(StoredVal, TypeToCastTo);
+
+ // Cast to pointer if the load needs a pointer type.
+ if (LoadedTy->isPtrOrPtrVectorTy())
+ StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy);
+ }
+
+ if (auto *C = dyn_cast<ConstantExpr>(StoredVal))
+ if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
+ StoredVal = FoldedStoredVal;
+
+ return StoredVal;
+ }
+ // If the loaded value is smaller than the available value, then we can
+ // extract out a piece from it. If the available value is too small, then we
+ // can't do anything.
+ assert(StoredValSize >= LoadedValSize &&
+ "canCoerceMustAliasedValueToLoad fail");
+
+ // Convert source pointers to integers, which can be manipulated.
+ if (StoredValTy->isPtrOrPtrVectorTy()) {
+ StoredValTy = DL.getIntPtrType(StoredValTy);
+ StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy);
+ }
+
+ // Convert vectors and fp to integer, which can be manipulated.
+ if (!StoredValTy->isIntegerTy()) {
+ StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize);
+ StoredVal = Helper.CreateBitCast(StoredVal, StoredValTy);
+ }
+
+ // If this is a big-endian system, we need to shift the value down to the low
+ // bits so that a truncate will work.
+ if (DL.isBigEndian()) {
+ uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy) -
+ DL.getTypeStoreSizeInBits(LoadedTy);
+ StoredVal = Helper.CreateLShr(
+ StoredVal, ConstantInt::get(StoredVal->getType(), ShiftAmt));
+ }
+
+ // Truncate the integer to the right size now.
+ Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize);
+ StoredVal = Helper.CreateTruncOrBitCast(StoredVal, NewIntTy);
+
+ if (LoadedTy != NewIntTy) {
+ // If the result is a pointer, inttoptr.
+ if (LoadedTy->isPtrOrPtrVectorTy())
+ StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy);
+ else
+ // Otherwise, bitcast.
+ StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy);
+ }
+
+ if (auto *C = dyn_cast<Constant>(StoredVal))
+ if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
+ StoredVal = FoldedStoredVal;
+
+ return StoredVal;
+}
+
+/// If we saw a store of a value to memory, and
+/// then a load from a must-aliased pointer of a different type, try to coerce
+/// the stored value. LoadedTy is the type of the load we want to replace.
+/// IRB is IRBuilder used to insert new instructions.
+///
+/// If we can't do it, return null.
+Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
+ IRBuilder<> &IRB, const DataLayout &DL) {
+ return coerceAvailableValueToLoadTypeHelper(StoredVal, LoadedTy, IRB, DL);
+}
+
+/// This function is called when we have a memdep query of a load that ends up
+/// being a clobbering memory write (store, memset, memcpy, memmove). This
+/// means that the write *may* provide bits used by the load but we can't be
+/// sure because the pointers don't must-alias.
+///
+/// Check this case to see if there is anything more we can do before we give
+/// up. This returns -1 if we have to give up, or a byte number in the stored
+/// value of the piece that feeds the load.
+static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
+ Value *WritePtr,
+ uint64_t WriteSizeInBits,
+ const DataLayout &DL) {
+ // If the loaded or stored value is a first class array or struct, don't try
+ // to transform them. We need to be able to bitcast to integer.
+ if (LoadTy->isStructTy() || LoadTy->isArrayTy())
+ return -1;
+
+ int64_t StoreOffset = 0, LoadOffset = 0;
+ Value *StoreBase =
+ GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL);
+ Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL);
+ if (StoreBase != LoadBase)
+ return -1;
+
+ // If the load and store are to the exact same address, they should have been
+ // a must alias. AA must have gotten confused.
+ // FIXME: Study to see if/when this happens. One case is forwarding a memset
+ // to a load from the base of the memset.
+
+ // If the load and store don't overlap at all, the store doesn't provide
+ // anything to the load. In this case, they really don't alias at all, AA
+ // must have gotten confused.
+ uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy);
+
+ if ((WriteSizeInBits & 7) | (LoadSize & 7))
+ return -1;
+ uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes.
+ LoadSize /= 8;
+
+ bool isAAFailure = false;
+ if (StoreOffset < LoadOffset)
+ isAAFailure = StoreOffset + int64_t(StoreSize) <= LoadOffset;
+ else
+ isAAFailure = LoadOffset + int64_t(LoadSize) <= StoreOffset;
+
+ if (isAAFailure)
+ return -1;
+
+ // If the Load isn't completely contained within the stored bits, we don't
+ // have all the bits to feed it. We could do something crazy in the future
+ // (issue a smaller load then merge the bits in) but this seems unlikely to be
+ // valuable.
+ if (StoreOffset > LoadOffset ||
+ StoreOffset + StoreSize < LoadOffset + LoadSize)
+ return -1;
+
+ // Okay, we can do this transformation. Return the number of bytes into the
+ // store that the load is.
+ return LoadOffset - StoreOffset;
+}
+
+/// This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store.
+int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
+ StoreInst *DepSI, const DataLayout &DL) {
+ // Cannot handle reading from store of first-class aggregate yet.
+ if (DepSI->getValueOperand()->getType()->isStructTy() ||
+ DepSI->getValueOperand()->getType()->isArrayTy())
+ return -1;
+
+ Value *StorePtr = DepSI->getPointerOperand();
+ uint64_t StoreSize =
+ DL.getTypeSizeInBits(DepSI->getValueOperand()->getType());
+ return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize,
+ DL);
+}
+
+/// This function is called when we have a
+/// memdep query of a load that ends up being clobbered by another load. See if
+/// the other load can feed into the second load.
+int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
+ const DataLayout &DL) {
+ // Cannot handle reading from store of first-class aggregate yet.
+ if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
+ return -1;
+
+ Value *DepPtr = DepLI->getPointerOperand();
+ uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType());
+ int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL);
+ if (R != -1)
+ return R;
+
+ // If we have a load/load clobber an DepLI can be widened to cover this load,
+ // then we should widen it!
+ int64_t LoadOffs = 0;
+ const Value *LoadBase =
+ GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL);
+ unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
+
+ unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
+ LoadBase, LoadOffs, LoadSize, DepLI);
+ if (Size == 0)
+ return -1;
+
+ // Check non-obvious conditions enforced by MDA which we rely on for being
+ // able to materialize this potentially available value
+ assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!");
+ assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load");
+
+ return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size * 8, DL);
+}
+
+int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
+ MemIntrinsic *MI, const DataLayout &DL) {
+ // If the mem operation is a non-constant size, we can't handle it.
+ ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
+ if (!SizeCst)
+ return -1;
+ uint64_t MemSizeInBits = SizeCst->getZExtValue() * 8;
+
+ // If this is memset, we just need to see if the offset is valid in the size
+ // of the memset..
+ if (MI->getIntrinsicID() == Intrinsic::memset)
+ return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
+ MemSizeInBits, DL);
+
+ // If we have a memcpy/memmove, the only case we can handle is if this is a
+ // copy from constant memory. In that case, we can read directly from the
+ // constant memory.
+ MemTransferInst *MTI = cast<MemTransferInst>(MI);
+
+ Constant *Src = dyn_cast<Constant>(MTI->getSource());
+ if (!Src)
+ return -1;
+
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL));
+ if (!GV || !GV->isConstant())
+ return -1;
+
+ // See if the access is within the bounds of the transfer.
+ int Offset = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
+ MemSizeInBits, DL);
+ if (Offset == -1)
+ return Offset;
+
+ unsigned AS = Src->getType()->getPointerAddressSpace();
+ // Otherwise, see if we can constant fold a load from the constant with the
+ // offset applied as appropriate.
+ Src =
+ ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS));
+ Constant *OffsetCst =
+ ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+ Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
+ OffsetCst);
+ Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
+ if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL))
+ return Offset;
+ return -1;
+}
+
+template <class T, class HelperClass>
+static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy,
+ HelperClass &Helper,
+ const DataLayout &DL) {
+ LLVMContext &Ctx = SrcVal->getType()->getContext();
+
+ // If two pointers are in the same address space, they have the same size,
+ // so we don't need to do any truncation, etc. This avoids introducing
+ // ptrtoint instructions for pointers that may be non-integral.
+ if (SrcVal->getType()->isPointerTy() && LoadTy->isPointerTy() &&
+ cast<PointerType>(SrcVal->getType())->getAddressSpace() ==
+ cast<PointerType>(LoadTy)->getAddressSpace()) {
+ return SrcVal;
+ }
+
+ uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
+ uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8;
+ // Compute which bits of the stored value are being used by the load. Convert
+ // to an integer type to start with.
+ if (SrcVal->getType()->isPtrOrPtrVectorTy())
+ SrcVal = Helper.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType()));
+ if (!SrcVal->getType()->isIntegerTy())
+ SrcVal = Helper.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8));
+
+ // Shift the bits to the least significant depending on endianness.
+ unsigned ShiftAmt;
+ if (DL.isLittleEndian())
+ ShiftAmt = Offset * 8;
+ else
+ ShiftAmt = (StoreSize - LoadSize - Offset) * 8;
+ if (ShiftAmt)
+ SrcVal = Helper.CreateLShr(SrcVal,
+ ConstantInt::get(SrcVal->getType(), ShiftAmt));
+
+ if (LoadSize != StoreSize)
+ SrcVal = Helper.CreateTruncOrBitCast(SrcVal,
+ IntegerType::get(Ctx, LoadSize * 8));
+ return SrcVal;
+}
+
+/// This function is called when we have a memdep query of a load that ends up
+/// being a clobbering store. This means that the store provides bits used by
+/// the load but the pointers don't must-alias. Check this case to see if
+/// there is anything more we can do before we give up.
+Value *getStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy,
+ Instruction *InsertPt, const DataLayout &DL) {
+
+ IRBuilder<> Builder(InsertPt);
+ SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL);
+ return coerceAvailableValueToLoadTypeHelper(SrcVal, LoadTy, Builder, DL);
+}
+
+Constant *getConstantStoreValueForLoad(Constant *SrcVal, unsigned Offset,
+ Type *LoadTy, const DataLayout &DL) {
+ ConstantFolder F;
+ SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, F, DL);
+ return coerceAvailableValueToLoadTypeHelper(SrcVal, LoadTy, F, DL);
+}
+
+/// This function is called when we have a memdep query of a load that ends up
+/// being a clobbering load. This means that the load *may* provide bits used
+/// by the load but we can't be sure because the pointers don't must-alias.
+/// Check this case to see if there is anything more we can do before we give
+/// up.
+Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy,
+ Instruction *InsertPt, const DataLayout &DL) {
+ // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
+ // widen SrcVal out to a larger load.
+ unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType());
+ unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
+ if (Offset + LoadSize > SrcValStoreSize) {
+ assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
+ assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
+ // If we have a load/load clobber an DepLI can be widened to cover this
+ // load, then we should widen it to the next power of 2 size big enough!
+ unsigned NewLoadSize = Offset + LoadSize;
+ if (!isPowerOf2_32(NewLoadSize))
+ NewLoadSize = NextPowerOf2(NewLoadSize);
+
+ Value *PtrVal = SrcVal->getPointerOperand();
+ // Insert the new load after the old load. This ensures that subsequent
+ // memdep queries will find the new load. We can't easily remove the old
+ // load completely because it is already in the value numbering table.
+ IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
+ Type *DestPTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8);
+ DestPTy =
+ PointerType::get(DestPTy, PtrVal->getType()->getPointerAddressSpace());
+ Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
+ PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
+ LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
+ NewLoad->takeName(SrcVal);
+ NewLoad->setAlignment(SrcVal->getAlignment());
+
+ DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
+ DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
+
+ // Replace uses of the original load with the wider load. On a big endian
+ // system, we need to shift down to get the relevant bits.
+ Value *RV = NewLoad;
+ if (DL.isBigEndian())
+ RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8);
+ RV = Builder.CreateTrunc(RV, SrcVal->getType());
+ SrcVal->replaceAllUsesWith(RV);
+
+ SrcVal = NewLoad;
+ }
+
+ return getStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL);
+}
+
+Constant *getConstantLoadValueForLoad(Constant *SrcVal, unsigned Offset,
+ Type *LoadTy, const DataLayout &DL) {
+ unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType());
+ unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
+ if (Offset + LoadSize > SrcValStoreSize)
+ return nullptr;
+ return getConstantStoreValueForLoad(SrcVal, Offset, LoadTy, DL);
+}
+
+template <class T, class HelperClass>
+T *getMemInstValueForLoadHelper(MemIntrinsic *SrcInst, unsigned Offset,
+ Type *LoadTy, HelperClass &Helper,
+ const DataLayout &DL) {
+ LLVMContext &Ctx = LoadTy->getContext();
+ uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy) / 8;
+
+ // We know that this method is only called when the mem transfer fully
+ // provides the bits for the load.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
+ // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
+ // independently of what the offset is.
+ T *Val = cast<T>(MSI->getValue());
+ if (LoadSize != 1)
+ Val =
+ Helper.CreateZExtOrBitCast(Val, IntegerType::get(Ctx, LoadSize * 8));
+ T *OneElt = Val;
+
+ // Splat the value out to the right number of bits.
+ for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize;) {
+ // If we can double the number of bytes set, do it.
+ if (NumBytesSet * 2 <= LoadSize) {
+ T *ShVal = Helper.CreateShl(
+ Val, ConstantInt::get(Val->getType(), NumBytesSet * 8));
+ Val = Helper.CreateOr(Val, ShVal);
+ NumBytesSet <<= 1;
+ continue;
+ }
+
+ // Otherwise insert one byte at a time.
+ T *ShVal = Helper.CreateShl(Val, ConstantInt::get(Val->getType(), 1 * 8));
+ Val = Helper.CreateOr(OneElt, ShVal);
+ ++NumBytesSet;
+ }
+
+ return coerceAvailableValueToLoadTypeHelper(Val, LoadTy, Helper, DL);
+ }
+
+ // Otherwise, this is a memcpy/memmove from a constant global.
+ MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
+ Constant *Src = cast<Constant>(MTI->getSource());
+ unsigned AS = Src->getType()->getPointerAddressSpace();
+
+ // Otherwise, see if we can constant fold a load from the constant with the
+ // offset applied as appropriate.
+ Src =
+ ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS));
+ Constant *OffsetCst =
+ ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+ Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
+ OffsetCst);
+ Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
+ return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL);
+}
+
+/// This function is called when we have a
+/// memdep query of a load that ends up being a clobbering mem intrinsic.
+Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
+ Type *LoadTy, Instruction *InsertPt,
+ const DataLayout &DL) {
+ IRBuilder<> Builder(InsertPt);
+ return getMemInstValueForLoadHelper<Value, IRBuilder<>>(SrcInst, Offset,
+ LoadTy, Builder, DL);
+}
+
+Constant *getConstantMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
+ Type *LoadTy, const DataLayout &DL) {
+ // The only case analyzeLoadFromClobberingMemInst cannot be converted to a
+ // constant is when it's a memset of a non-constant.
+ if (auto *MSI = dyn_cast<MemSetInst>(SrcInst))
+ if (!isa<Constant>(MSI->getValue()))
+ return nullptr;
+ ConstantFolder F;
+ return getMemInstValueForLoadHelper<Constant, ConstantFolder>(SrcInst, Offset,
+ LoadTy, F, DL);
+}
+} // namespace VNCoercion
+} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
new file mode 100644
index 000000000000..930972924c3c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -0,0 +1,1109 @@
+//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MapValue function, which is shared by various parts of
+// the lib/Transforms/Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Operator.h"
+using namespace llvm;
+
+// Out of line method to get vtable etc for class.
+void ValueMapTypeRemapper::anchor() {}
+void ValueMaterializer::anchor() {}
+
+namespace {
+
+/// A basic block used in a BlockAddress whose function body is not yet
+/// materialized.
+struct DelayedBasicBlock {
+ BasicBlock *OldBB;
+ std::unique_ptr<BasicBlock> TempBB;
+
+ DelayedBasicBlock(const BlockAddress &Old)
+ : OldBB(Old.getBasicBlock()),
+ TempBB(BasicBlock::Create(Old.getContext())) {}
+};
+
+struct WorklistEntry {
+ enum EntryKind {
+ MapGlobalInit,
+ MapAppendingVar,
+ MapGlobalAliasee,
+ RemapFunction
+ };
+ struct GVInitTy {
+ GlobalVariable *GV;
+ Constant *Init;
+ };
+ struct AppendingGVTy {
+ GlobalVariable *GV;
+ Constant *InitPrefix;
+ };
+ struct GlobalAliaseeTy {
+ GlobalAlias *GA;
+ Constant *Aliasee;
+ };
+
+ unsigned Kind : 2;
+ unsigned MCID : 29;
+ unsigned AppendingGVIsOldCtorDtor : 1;
+ unsigned AppendingGVNumNewMembers;
+ union {
+ GVInitTy GVInit;
+ AppendingGVTy AppendingGV;
+ GlobalAliaseeTy GlobalAliasee;
+ Function *RemapF;
+ } Data;
+};
+
+struct MappingContext {
+ ValueToValueMapTy *VM;
+ ValueMaterializer *Materializer = nullptr;
+
+ /// Construct a MappingContext with a value map and materializer.
+ explicit MappingContext(ValueToValueMapTy &VM,
+ ValueMaterializer *Materializer = nullptr)
+ : VM(&VM), Materializer(Materializer) {}
+};
+
+class MDNodeMapper;
+class Mapper {
+ friend class MDNodeMapper;
+
+#ifndef NDEBUG
+ DenseSet<GlobalValue *> AlreadyScheduled;
+#endif
+
+ RemapFlags Flags;
+ ValueMapTypeRemapper *TypeMapper;
+ unsigned CurrentMCID = 0;
+ SmallVector<MappingContext, 2> MCs;
+ SmallVector<WorklistEntry, 4> Worklist;
+ SmallVector<DelayedBasicBlock, 1> DelayedBBs;
+ SmallVector<Constant *, 16> AppendingInits;
+
+public:
+ Mapper(ValueToValueMapTy &VM, RemapFlags Flags,
+ ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer)
+ : Flags(Flags), TypeMapper(TypeMapper),
+ MCs(1, MappingContext(VM, Materializer)) {}
+
+ /// ValueMapper should explicitly call \a flush() before destruction.
+ ~Mapper() { assert(!hasWorkToDo() && "Expected to be flushed"); }
+
+ bool hasWorkToDo() const { return !Worklist.empty(); }
+
+ unsigned
+ registerAlternateMappingContext(ValueToValueMapTy &VM,
+ ValueMaterializer *Materializer = nullptr) {
+ MCs.push_back(MappingContext(VM, Materializer));
+ return MCs.size() - 1;
+ }
+
+ void addFlags(RemapFlags Flags);
+
+ void remapGlobalObjectMetadata(GlobalObject &GO);
+
+ Value *mapValue(const Value *V);
+ void remapInstruction(Instruction *I);
+ void remapFunction(Function &F);
+
+ Constant *mapConstant(const Constant *C) {
+ return cast_or_null<Constant>(mapValue(C));
+ }
+
+ /// Map metadata.
+ ///
+ /// Find the mapping for MD. Guarantees that the return will be resolved
+ /// (not an MDNode, or MDNode::isResolved() returns true).
+ Metadata *mapMetadata(const Metadata *MD);
+
+ void scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init,
+ unsigned MCID);
+ void scheduleMapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
+ bool IsOldCtorDtor,
+ ArrayRef<Constant *> NewMembers,
+ unsigned MCID);
+ void scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee,
+ unsigned MCID);
+ void scheduleRemapFunction(Function &F, unsigned MCID);
+
+ void flush();
+
+private:
+ void mapGlobalInitializer(GlobalVariable &GV, Constant &Init);
+ void mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
+ bool IsOldCtorDtor,
+ ArrayRef<Constant *> NewMembers);
+ void mapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee);
+ void remapFunction(Function &F, ValueToValueMapTy &VM);
+
+ ValueToValueMapTy &getVM() { return *MCs[CurrentMCID].VM; }
+ ValueMaterializer *getMaterializer() { return MCs[CurrentMCID].Materializer; }
+
+ Value *mapBlockAddress(const BlockAddress &BA);
+
+ /// Map metadata that doesn't require visiting operands.
+ Optional<Metadata *> mapSimpleMetadata(const Metadata *MD);
+
+ Metadata *mapToMetadata(const Metadata *Key, Metadata *Val);
+ Metadata *mapToSelf(const Metadata *MD);
+};
+
+class MDNodeMapper {
+ Mapper &M;
+
+ /// Data about a node in \a UniquedGraph.
+ struct Data {
+ bool HasChanged = false;
+ unsigned ID = ~0u;
+ TempMDNode Placeholder;
+ };
+
+ /// A graph of uniqued nodes.
+ struct UniquedGraph {
+ SmallDenseMap<const Metadata *, Data, 32> Info; // Node properties.
+ SmallVector<MDNode *, 16> POT; // Post-order traversal.
+
+ /// Propagate changed operands through the post-order traversal.
+ ///
+ /// Iteratively update \a Data::HasChanged for each node based on \a
+ /// Data::HasChanged of its operands, until fixed point.
+ void propagateChanges();
+
+ /// Get a forward reference to a node to use as an operand.
+ Metadata &getFwdReference(MDNode &Op);
+ };
+
+ /// Worklist of distinct nodes whose operands need to be remapped.
+ SmallVector<MDNode *, 16> DistinctWorklist;
+
+ // Storage for a UniquedGraph.
+ SmallDenseMap<const Metadata *, Data, 32> InfoStorage;
+ SmallVector<MDNode *, 16> POTStorage;
+
+public:
+ MDNodeMapper(Mapper &M) : M(M) {}
+
+ /// Map a metadata node (and its transitive operands).
+ ///
+ /// Map all the (unmapped) nodes in the subgraph under \c N. The iterative
+ /// algorithm handles distinct nodes and uniqued node subgraphs using
+ /// different strategies.
+ ///
+ /// Distinct nodes are immediately mapped and added to \a DistinctWorklist
+ /// using \a mapDistinctNode(). Their mapping can always be computed
+ /// immediately without visiting operands, even if their operands change.
+ ///
+ /// The mapping for uniqued nodes depends on whether their operands change.
+ /// \a mapTopLevelUniquedNode() traverses the transitive uniqued subgraph of
+ /// a node to calculate uniqued node mappings in bulk. Distinct leafs are
+ /// added to \a DistinctWorklist with \a mapDistinctNode().
+ ///
+ /// After mapping \c N itself, this function remaps the operands of the
+ /// distinct nodes in \a DistinctWorklist until the entire subgraph under \c
+ /// N has been mapped.
+ Metadata *map(const MDNode &N);
+
+private:
+ /// Map a top-level uniqued node and the uniqued subgraph underneath it.
+ ///
+ /// This builds up a post-order traversal of the (unmapped) uniqued subgraph
+ /// underneath \c FirstN and calculates the nodes' mapping. Each node uses
+ /// the identity mapping (\a Mapper::mapToSelf()) as long as all of its
+ /// operands uses the identity mapping.
+ ///
+ /// The algorithm works as follows:
+ ///
+ /// 1. \a createPOT(): traverse the uniqued subgraph under \c FirstN and
+ /// save the post-order traversal in the given \a UniquedGraph, tracking
+ /// nodes' operands change.
+ ///
+ /// 2. \a UniquedGraph::propagateChanges(): propagate changed operands
+ /// through the \a UniquedGraph until fixed point, following the rule
+ /// that if a node changes, any node that references must also change.
+ ///
+ /// 3. \a mapNodesInPOT(): map the uniqued nodes, creating new uniqued nodes
+ /// (referencing new operands) where necessary.
+ Metadata *mapTopLevelUniquedNode(const MDNode &FirstN);
+
+ /// Try to map the operand of an \a MDNode.
+ ///
+ /// If \c Op is already mapped, return the mapping. If it's not an \a
+ /// MDNode, compute and return the mapping. If it's a distinct \a MDNode,
+ /// return the result of \a mapDistinctNode().
+ ///
+ /// \return None if \c Op is an unmapped uniqued \a MDNode.
+ /// \post getMappedOp(Op) only returns None if this returns None.
+ Optional<Metadata *> tryToMapOperand(const Metadata *Op);
+
+ /// Map a distinct node.
+ ///
+ /// Return the mapping for the distinct node \c N, saving the result in \a
+ /// DistinctWorklist for later remapping.
+ ///
+ /// \pre \c N is not yet mapped.
+ /// \pre \c N.isDistinct().
+ MDNode *mapDistinctNode(const MDNode &N);
+
+ /// Get a previously mapped node.
+ Optional<Metadata *> getMappedOp(const Metadata *Op) const;
+
+ /// Create a post-order traversal of an unmapped uniqued node subgraph.
+ ///
+ /// This traverses the metadata graph deeply enough to map \c FirstN. It
+ /// uses \a tryToMapOperand() (via \a Mapper::mapSimplifiedNode()), so any
+ /// metadata that has already been mapped will not be part of the POT.
+ ///
+ /// Each node that has a changed operand from outside the graph (e.g., a
+ /// distinct node, an already-mapped uniqued node, or \a ConstantAsMetadata)
+ /// is marked with \a Data::HasChanged.
+ ///
+ /// \return \c true if any nodes in \c G have \a Data::HasChanged.
+ /// \post \c G.POT is a post-order traversal ending with \c FirstN.
+ /// \post \a Data::hasChanged in \c G.Info indicates whether any node needs
+ /// to change because of operands outside the graph.
+ bool createPOT(UniquedGraph &G, const MDNode &FirstN);
+
+ /// Visit the operands of a uniqued node in the POT.
+ ///
+ /// Visit the operands in the range from \c I to \c E, returning the first
+ /// uniqued node we find that isn't yet in \c G. \c I is always advanced to
+ /// where to continue the loop through the operands.
+ ///
+ /// This sets \c HasChanged if any of the visited operands change.
+ MDNode *visitOperands(UniquedGraph &G, MDNode::op_iterator &I,
+ MDNode::op_iterator E, bool &HasChanged);
+
+ /// Map all the nodes in the given uniqued graph.
+ ///
+ /// This visits all the nodes in \c G in post-order, using the identity
+ /// mapping or creating a new node depending on \a Data::HasChanged.
+ ///
+ /// \pre \a getMappedOp() returns None for nodes in \c G, but not for any of
+ /// their operands outside of \c G.
+ /// \pre \a Data::HasChanged is true for a node in \c G iff any of its
+ /// operands have changed.
+ /// \post \a getMappedOp() returns the mapped node for every node in \c G.
+ void mapNodesInPOT(UniquedGraph &G);
+
+ /// Remap a node's operands using the given functor.
+ ///
+ /// Iterate through the operands of \c N and update them in place using \c
+ /// mapOperand.
+ ///
+ /// \pre N.isDistinct() or N.isTemporary().
+ template <class OperandMapper>
+ void remapOperands(MDNode &N, OperandMapper mapOperand);
+};
+
+} // end namespace
+
+Value *Mapper::mapValue(const Value *V) {
+ ValueToValueMapTy::iterator I = getVM().find(V);
+
+ // If the value already exists in the map, use it.
+ if (I != getVM().end()) {
+ assert(I->second && "Unexpected null mapping");
+ return I->second;
+ }
+
+ // If we have a materializer and it can materialize a value, use that.
+ if (auto *Materializer = getMaterializer()) {
+ if (Value *NewV = Materializer->materialize(const_cast<Value *>(V))) {
+ getVM()[V] = NewV;
+ return NewV;
+ }
+ }
+
+ // Global values do not need to be seeded into the VM if they
+ // are using the identity mapping.
+ if (isa<GlobalValue>(V)) {
+ if (Flags & RF_NullMapMissingGlobalValues)
+ return nullptr;
+ return getVM()[V] = const_cast<Value *>(V);
+ }
+
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+ // Inline asm may need *type* remapping.
+ FunctionType *NewTy = IA->getFunctionType();
+ if (TypeMapper) {
+ NewTy = cast<FunctionType>(TypeMapper->remapType(NewTy));
+
+ if (NewTy != IA->getFunctionType())
+ V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(),
+ IA->hasSideEffects(), IA->isAlignStack());
+ }
+
+ return getVM()[V] = const_cast<Value *>(V);
+ }
+
+ if (const auto *MDV = dyn_cast<MetadataAsValue>(V)) {
+ const Metadata *MD = MDV->getMetadata();
+
+ if (auto *LAM = dyn_cast<LocalAsMetadata>(MD)) {
+ // Look through to grab the local value.
+ if (Value *LV = mapValue(LAM->getValue())) {
+ if (V == LAM->getValue())
+ return const_cast<Value *>(V);
+ return MetadataAsValue::get(V->getContext(), ValueAsMetadata::get(LV));
+ }
+
+ // FIXME: always return nullptr once Verifier::verifyDominatesUse()
+ // ensures metadata operands only reference defined SSA values.
+ return (Flags & RF_IgnoreMissingLocals)
+ ? nullptr
+ : MetadataAsValue::get(V->getContext(),
+ MDTuple::get(V->getContext(), None));
+ }
+
+ // If this is a module-level metadata and we know that nothing at the module
+ // level is changing, then use an identity mapping.
+ if (Flags & RF_NoModuleLevelChanges)
+ return getVM()[V] = const_cast<Value *>(V);
+
+ // Map the metadata and turn it into a value.
+ auto *MappedMD = mapMetadata(MD);
+ if (MD == MappedMD)
+ return getVM()[V] = const_cast<Value *>(V);
+ return getVM()[V] = MetadataAsValue::get(V->getContext(), MappedMD);
+ }
+
+ // Okay, this either must be a constant (which may or may not be mappable) or
+ // is something that is not in the mapping table.
+ Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
+ if (!C)
+ return nullptr;
+
+ if (BlockAddress *BA = dyn_cast<BlockAddress>(C))
+ return mapBlockAddress(*BA);
+
+ auto mapValueOrNull = [this](Value *V) {
+ auto Mapped = mapValue(V);
+ assert((Mapped || (Flags & RF_NullMapMissingGlobalValues)) &&
+ "Unexpected null mapping for constant operand without "
+ "NullMapMissingGlobalValues flag");
+ return Mapped;
+ };
+
+ // Otherwise, we have some other constant to remap. Start by checking to see
+ // if all operands have an identity remapping.
+ unsigned OpNo = 0, NumOperands = C->getNumOperands();
+ Value *Mapped = nullptr;
+ for (; OpNo != NumOperands; ++OpNo) {
+ Value *Op = C->getOperand(OpNo);
+ Mapped = mapValueOrNull(Op);
+ if (!Mapped)
+ return nullptr;
+ if (Mapped != Op)
+ break;
+ }
+
+ // See if the type mapper wants to remap the type as well.
+ Type *NewTy = C->getType();
+ if (TypeMapper)
+ NewTy = TypeMapper->remapType(NewTy);
+
+ // If the result type and all operands match up, then just insert an identity
+ // mapping.
+ if (OpNo == NumOperands && NewTy == C->getType())
+ return getVM()[V] = C;
+
+ // Okay, we need to create a new constant. We've already processed some or
+ // all of the operands, set them all up now.
+ SmallVector<Constant*, 8> Ops;
+ Ops.reserve(NumOperands);
+ for (unsigned j = 0; j != OpNo; ++j)
+ Ops.push_back(cast<Constant>(C->getOperand(j)));
+
+ // If one of the operands mismatch, push it and the other mapped operands.
+ if (OpNo != NumOperands) {
+ Ops.push_back(cast<Constant>(Mapped));
+
+ // Map the rest of the operands that aren't processed yet.
+ for (++OpNo; OpNo != NumOperands; ++OpNo) {
+ Mapped = mapValueOrNull(C->getOperand(OpNo));
+ if (!Mapped)
+ return nullptr;
+ Ops.push_back(cast<Constant>(Mapped));
+ }
+ }
+ Type *NewSrcTy = nullptr;
+ if (TypeMapper)
+ if (auto *GEPO = dyn_cast<GEPOperator>(C))
+ NewSrcTy = TypeMapper->remapType(GEPO->getSourceElementType());
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ return getVM()[V] = CE->getWithOperands(Ops, NewTy, false, NewSrcTy);
+ if (isa<ConstantArray>(C))
+ return getVM()[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops);
+ if (isa<ConstantStruct>(C))
+ return getVM()[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops);
+ if (isa<ConstantVector>(C))
+ return getVM()[V] = ConstantVector::get(Ops);
+ // If this is a no-operand constant, it must be because the type was remapped.
+ if (isa<UndefValue>(C))
+ return getVM()[V] = UndefValue::get(NewTy);
+ if (isa<ConstantAggregateZero>(C))
+ return getVM()[V] = ConstantAggregateZero::get(NewTy);
+ assert(isa<ConstantPointerNull>(C));
+ return getVM()[V] = ConstantPointerNull::get(cast<PointerType>(NewTy));
+}
+
+Value *Mapper::mapBlockAddress(const BlockAddress &BA) {
+ Function *F = cast<Function>(mapValue(BA.getFunction()));
+
+ // F may not have materialized its initializer. In that case, create a
+ // dummy basic block for now, and replace it once we've materialized all
+ // the initializers.
+ BasicBlock *BB;
+ if (F->empty()) {
+ DelayedBBs.push_back(DelayedBasicBlock(BA));
+ BB = DelayedBBs.back().TempBB.get();
+ } else {
+ BB = cast_or_null<BasicBlock>(mapValue(BA.getBasicBlock()));
+ }
+
+ return getVM()[&BA] = BlockAddress::get(F, BB ? BB : BA.getBasicBlock());
+}
+
+Metadata *Mapper::mapToMetadata(const Metadata *Key, Metadata *Val) {
+ getVM().MD()[Key].reset(Val);
+ return Val;
+}
+
+Metadata *Mapper::mapToSelf(const Metadata *MD) {
+ return mapToMetadata(MD, const_cast<Metadata *>(MD));
+}
+
+Optional<Metadata *> MDNodeMapper::tryToMapOperand(const Metadata *Op) {
+ if (!Op)
+ return nullptr;
+
+ if (Optional<Metadata *> MappedOp = M.mapSimpleMetadata(Op)) {
+#ifndef NDEBUG
+ if (auto *CMD = dyn_cast<ConstantAsMetadata>(Op))
+ assert((!*MappedOp || M.getVM().count(CMD->getValue()) ||
+ M.getVM().getMappedMD(Op)) &&
+ "Expected Value to be memoized");
+ else
+ assert((isa<MDString>(Op) || M.getVM().getMappedMD(Op)) &&
+ "Expected result to be memoized");
+#endif
+ return *MappedOp;
+ }
+
+ const MDNode &N = *cast<MDNode>(Op);
+ if (N.isDistinct())
+ return mapDistinctNode(N);
+ return None;
+}
+
+MDNode *MDNodeMapper::mapDistinctNode(const MDNode &N) {
+ assert(N.isDistinct() && "Expected a distinct node");
+ assert(!M.getVM().getMappedMD(&N) && "Expected an unmapped node");
+ DistinctWorklist.push_back(cast<MDNode>(
+ (M.Flags & RF_MoveDistinctMDs)
+ ? M.mapToSelf(&N)
+ : M.mapToMetadata(&N, MDNode::replaceWithDistinct(N.clone()))));
+ return DistinctWorklist.back();
+}
+
+static ConstantAsMetadata *wrapConstantAsMetadata(const ConstantAsMetadata &CMD,
+ Value *MappedV) {
+ if (CMD.getValue() == MappedV)
+ return const_cast<ConstantAsMetadata *>(&CMD);
+ return MappedV ? ConstantAsMetadata::getConstant(MappedV) : nullptr;
+}
+
+Optional<Metadata *> MDNodeMapper::getMappedOp(const Metadata *Op) const {
+ if (!Op)
+ return nullptr;
+
+ if (Optional<Metadata *> MappedOp = M.getVM().getMappedMD(Op))
+ return *MappedOp;
+
+ if (isa<MDString>(Op))
+ return const_cast<Metadata *>(Op);
+
+ if (auto *CMD = dyn_cast<ConstantAsMetadata>(Op))
+ return wrapConstantAsMetadata(*CMD, M.getVM().lookup(CMD->getValue()));
+
+ return None;
+}
+
+Metadata &MDNodeMapper::UniquedGraph::getFwdReference(MDNode &Op) {
+ auto Where = Info.find(&Op);
+ assert(Where != Info.end() && "Expected a valid reference");
+
+ auto &OpD = Where->second;
+ if (!OpD.HasChanged)
+ return Op;
+
+ // Lazily construct a temporary node.
+ if (!OpD.Placeholder)
+ OpD.Placeholder = Op.clone();
+
+ return *OpD.Placeholder;
+}
+
+template <class OperandMapper>
+void MDNodeMapper::remapOperands(MDNode &N, OperandMapper mapOperand) {
+ assert(!N.isUniqued() && "Expected distinct or temporary nodes");
+ for (unsigned I = 0, E = N.getNumOperands(); I != E; ++I) {
+ Metadata *Old = N.getOperand(I);
+ Metadata *New = mapOperand(Old);
+
+ if (Old != New)
+ N.replaceOperandWith(I, New);
+ }
+}
+
+namespace {
+/// An entry in the worklist for the post-order traversal.
+struct POTWorklistEntry {
+ MDNode *N; ///< Current node.
+ MDNode::op_iterator Op; ///< Current operand of \c N.
+
+ /// Keep a flag of whether operands have changed in the worklist to avoid
+ /// hitting the map in \a UniquedGraph.
+ bool HasChanged = false;
+
+ POTWorklistEntry(MDNode &N) : N(&N), Op(N.op_begin()) {}
+};
+} // end namespace
+
+bool MDNodeMapper::createPOT(UniquedGraph &G, const MDNode &FirstN) {
+ assert(G.Info.empty() && "Expected a fresh traversal");
+ assert(FirstN.isUniqued() && "Expected uniqued node in POT");
+
+ // Construct a post-order traversal of the uniqued subgraph under FirstN.
+ bool AnyChanges = false;
+ SmallVector<POTWorklistEntry, 16> Worklist;
+ Worklist.push_back(POTWorklistEntry(const_cast<MDNode &>(FirstN)));
+ (void)G.Info[&FirstN];
+ while (!Worklist.empty()) {
+ // Start or continue the traversal through the this node's operands.
+ auto &WE = Worklist.back();
+ if (MDNode *N = visitOperands(G, WE.Op, WE.N->op_end(), WE.HasChanged)) {
+ // Push a new node to traverse first.
+ Worklist.push_back(POTWorklistEntry(*N));
+ continue;
+ }
+
+ // Push the node onto the POT.
+ assert(WE.N->isUniqued() && "Expected only uniqued nodes");
+ assert(WE.Op == WE.N->op_end() && "Expected to visit all operands");
+ auto &D = G.Info[WE.N];
+ AnyChanges |= D.HasChanged = WE.HasChanged;
+ D.ID = G.POT.size();
+ G.POT.push_back(WE.N);
+
+ // Pop the node off the worklist.
+ Worklist.pop_back();
+ }
+ return AnyChanges;
+}
+
+MDNode *MDNodeMapper::visitOperands(UniquedGraph &G, MDNode::op_iterator &I,
+ MDNode::op_iterator E, bool &HasChanged) {
+ while (I != E) {
+ Metadata *Op = *I++; // Increment even on early return.
+ if (Optional<Metadata *> MappedOp = tryToMapOperand(Op)) {
+ // Check if the operand changes.
+ HasChanged |= Op != *MappedOp;
+ continue;
+ }
+
+ // A uniqued metadata node.
+ MDNode &OpN = *cast<MDNode>(Op);
+ assert(OpN.isUniqued() &&
+ "Only uniqued operands cannot be mapped immediately");
+ if (G.Info.insert(std::make_pair(&OpN, Data())).second)
+ return &OpN; // This is a new one. Return it.
+ }
+ return nullptr;
+}
+
+void MDNodeMapper::UniquedGraph::propagateChanges() {
+ bool AnyChanges;
+ do {
+ AnyChanges = false;
+ for (MDNode *N : POT) {
+ auto &D = Info[N];
+ if (D.HasChanged)
+ continue;
+
+ if (none_of(N->operands(), [&](const Metadata *Op) {
+ auto Where = Info.find(Op);
+ return Where != Info.end() && Where->second.HasChanged;
+ }))
+ continue;
+
+ AnyChanges = D.HasChanged = true;
+ }
+ } while (AnyChanges);
+}
+
+void MDNodeMapper::mapNodesInPOT(UniquedGraph &G) {
+ // Construct uniqued nodes, building forward references as necessary.
+ SmallVector<MDNode *, 16> CyclicNodes;
+ for (auto *N : G.POT) {
+ auto &D = G.Info[N];
+ if (!D.HasChanged) {
+ // The node hasn't changed.
+ M.mapToSelf(N);
+ continue;
+ }
+
+ // Remember whether this node had a placeholder.
+ bool HadPlaceholder(D.Placeholder);
+
+ // Clone the uniqued node and remap the operands.
+ TempMDNode ClonedN = D.Placeholder ? std::move(D.Placeholder) : N->clone();
+ remapOperands(*ClonedN, [this, &D, &G](Metadata *Old) {
+ if (Optional<Metadata *> MappedOp = getMappedOp(Old))
+ return *MappedOp;
+ (void)D;
+ assert(G.Info[Old].ID > D.ID && "Expected a forward reference");
+ return &G.getFwdReference(*cast<MDNode>(Old));
+ });
+
+ auto *NewN = MDNode::replaceWithUniqued(std::move(ClonedN));
+ M.mapToMetadata(N, NewN);
+
+ // Nodes that were referenced out of order in the POT are involved in a
+ // uniquing cycle.
+ if (HadPlaceholder)
+ CyclicNodes.push_back(NewN);
+ }
+
+ // Resolve cycles.
+ for (auto *N : CyclicNodes)
+ if (!N->isResolved())
+ N->resolveCycles();
+}
+
+Metadata *MDNodeMapper::map(const MDNode &N) {
+ assert(DistinctWorklist.empty() && "MDNodeMapper::map is not recursive");
+ assert(!(M.Flags & RF_NoModuleLevelChanges) &&
+ "MDNodeMapper::map assumes module-level changes");
+
+ // Require resolved nodes whenever metadata might be remapped.
+ assert(N.isResolved() && "Unexpected unresolved node");
+
+ Metadata *MappedN =
+ N.isUniqued() ? mapTopLevelUniquedNode(N) : mapDistinctNode(N);
+ while (!DistinctWorklist.empty())
+ remapOperands(*DistinctWorklist.pop_back_val(), [this](Metadata *Old) {
+ if (Optional<Metadata *> MappedOp = tryToMapOperand(Old))
+ return *MappedOp;
+ return mapTopLevelUniquedNode(*cast<MDNode>(Old));
+ });
+ return MappedN;
+}
+
+Metadata *MDNodeMapper::mapTopLevelUniquedNode(const MDNode &FirstN) {
+ assert(FirstN.isUniqued() && "Expected uniqued node");
+
+ // Create a post-order traversal of uniqued nodes under FirstN.
+ UniquedGraph G;
+ if (!createPOT(G, FirstN)) {
+ // Return early if no nodes have changed.
+ for (const MDNode *N : G.POT)
+ M.mapToSelf(N);
+ return &const_cast<MDNode &>(FirstN);
+ }
+
+ // Update graph with all nodes that have changed.
+ G.propagateChanges();
+
+ // Map all the nodes in the graph.
+ mapNodesInPOT(G);
+
+ // Return the original node, remapped.
+ return *getMappedOp(&FirstN);
+}
+
+namespace {
+
+struct MapMetadataDisabler {
+ ValueToValueMapTy &VM;
+
+ MapMetadataDisabler(ValueToValueMapTy &VM) : VM(VM) {
+ VM.disableMapMetadata();
+ }
+ ~MapMetadataDisabler() { VM.enableMapMetadata(); }
+};
+
+} // end namespace
+
+Optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) {
+ // If the value already exists in the map, use it.
+ if (Optional<Metadata *> NewMD = getVM().getMappedMD(MD))
+ return *NewMD;
+
+ if (isa<MDString>(MD))
+ return const_cast<Metadata *>(MD);
+
+ // This is a module-level metadata. If nothing at the module level is
+ // changing, use an identity mapping.
+ if ((Flags & RF_NoModuleLevelChanges))
+ return const_cast<Metadata *>(MD);
+
+ if (auto *CMD = dyn_cast<ConstantAsMetadata>(MD)) {
+ // Disallow recursion into metadata mapping through mapValue.
+ MapMetadataDisabler MMD(getVM());
+
+ // Don't memoize ConstantAsMetadata. Instead of lasting until the
+ // LLVMContext is destroyed, they can be deleted when the GlobalValue they
+ // reference is destructed. These aren't super common, so the extra
+ // indirection isn't that expensive.
+ return wrapConstantAsMetadata(*CMD, mapValue(CMD->getValue()));
+ }
+
+ assert(isa<MDNode>(MD) && "Expected a metadata node");
+
+ return None;
+}
+
+Metadata *Mapper::mapMetadata(const Metadata *MD) {
+ assert(MD && "Expected valid metadata");
+ assert(!isa<LocalAsMetadata>(MD) && "Unexpected local metadata");
+
+ if (Optional<Metadata *> NewMD = mapSimpleMetadata(MD))
+ return *NewMD;
+
+ return MDNodeMapper(*this).map(*cast<MDNode>(MD));
+}
+
+void Mapper::flush() {
+ // Flush out the worklist of global values.
+ while (!Worklist.empty()) {
+ WorklistEntry E = Worklist.pop_back_val();
+ CurrentMCID = E.MCID;
+ switch (E.Kind) {
+ case WorklistEntry::MapGlobalInit:
+ E.Data.GVInit.GV->setInitializer(mapConstant(E.Data.GVInit.Init));
+ remapGlobalObjectMetadata(*E.Data.GVInit.GV);
+ break;
+ case WorklistEntry::MapAppendingVar: {
+ unsigned PrefixSize = AppendingInits.size() - E.AppendingGVNumNewMembers;
+ mapAppendingVariable(*E.Data.AppendingGV.GV,
+ E.Data.AppendingGV.InitPrefix,
+ E.AppendingGVIsOldCtorDtor,
+ makeArrayRef(AppendingInits).slice(PrefixSize));
+ AppendingInits.resize(PrefixSize);
+ break;
+ }
+ case WorklistEntry::MapGlobalAliasee:
+ E.Data.GlobalAliasee.GA->setAliasee(
+ mapConstant(E.Data.GlobalAliasee.Aliasee));
+ break;
+ case WorklistEntry::RemapFunction:
+ remapFunction(*E.Data.RemapF);
+ break;
+ }
+ }
+ CurrentMCID = 0;
+
+ // Finish logic for block addresses now that all global values have been
+ // handled.
+ while (!DelayedBBs.empty()) {
+ DelayedBasicBlock DBB = DelayedBBs.pop_back_val();
+ BasicBlock *BB = cast_or_null<BasicBlock>(mapValue(DBB.OldBB));
+ DBB.TempBB->replaceAllUsesWith(BB ? BB : DBB.OldBB);
+ }
+}
+
+void Mapper::remapInstruction(Instruction *I) {
+ // Remap operands.
+ for (Use &Op : I->operands()) {
+ Value *V = mapValue(Op);
+ // If we aren't ignoring missing entries, assert that something happened.
+ if (V)
+ Op = V;
+ else
+ assert((Flags & RF_IgnoreMissingLocals) &&
+ "Referenced value not in value map!");
+ }
+
+ // Remap phi nodes' incoming blocks.
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = mapValue(PN->getIncomingBlock(i));
+ // If we aren't ignoring missing entries, assert that something happened.
+ if (V)
+ PN->setIncomingBlock(i, cast<BasicBlock>(V));
+ else
+ assert((Flags & RF_IgnoreMissingLocals) &&
+ "Referenced block not in value map!");
+ }
+ }
+
+ // Remap attached metadata.
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ I->getAllMetadata(MDs);
+ for (const auto &MI : MDs) {
+ MDNode *Old = MI.second;
+ MDNode *New = cast_or_null<MDNode>(mapMetadata(Old));
+ if (New != Old)
+ I->setMetadata(MI.first, New);
+ }
+
+ if (!TypeMapper)
+ return;
+
+ // If the instruction's type is being remapped, do so now.
+ if (auto CS = CallSite(I)) {
+ SmallVector<Type *, 3> Tys;
+ FunctionType *FTy = CS.getFunctionType();
+ Tys.reserve(FTy->getNumParams());
+ for (Type *Ty : FTy->params())
+ Tys.push_back(TypeMapper->remapType(Ty));
+ CS.mutateFunctionType(FunctionType::get(
+ TypeMapper->remapType(I->getType()), Tys, FTy->isVarArg()));
+ return;
+ }
+ if (auto *AI = dyn_cast<AllocaInst>(I))
+ AI->setAllocatedType(TypeMapper->remapType(AI->getAllocatedType()));
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ GEP->setSourceElementType(
+ TypeMapper->remapType(GEP->getSourceElementType()));
+ GEP->setResultElementType(
+ TypeMapper->remapType(GEP->getResultElementType()));
+ }
+ I->mutateType(TypeMapper->remapType(I->getType()));
+}
+
+void Mapper::remapGlobalObjectMetadata(GlobalObject &GO) {
+ SmallVector<std::pair<unsigned, MDNode *>, 8> MDs;
+ GO.getAllMetadata(MDs);
+ GO.clearMetadata();
+ for (const auto &I : MDs)
+ GO.addMetadata(I.first, *cast<MDNode>(mapMetadata(I.second)));
+}
+
+void Mapper::remapFunction(Function &F) {
+ // Remap the operands.
+ for (Use &Op : F.operands())
+ if (Op)
+ Op = mapValue(Op);
+
+ // Remap the metadata attachments.
+ remapGlobalObjectMetadata(F);
+
+ // Remap the argument types.
+ if (TypeMapper)
+ for (Argument &A : F.args())
+ A.mutateType(TypeMapper->remapType(A.getType()));
+
+ // Remap the instructions.
+ for (BasicBlock &BB : F)
+ for (Instruction &I : BB)
+ remapInstruction(&I);
+}
+
+void Mapper::mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
+ bool IsOldCtorDtor,
+ ArrayRef<Constant *> NewMembers) {
+ SmallVector<Constant *, 16> Elements;
+ if (InitPrefix) {
+ unsigned NumElements =
+ cast<ArrayType>(InitPrefix->getType())->getNumElements();
+ for (unsigned I = 0; I != NumElements; ++I)
+ Elements.push_back(InitPrefix->getAggregateElement(I));
+ }
+
+ PointerType *VoidPtrTy;
+ Type *EltTy;
+ if (IsOldCtorDtor) {
+ // FIXME: This upgrade is done during linking to support the C API. See
+ // also IRLinker::linkAppendingVarProto() in IRMover.cpp.
+ VoidPtrTy = Type::getInt8Ty(GV.getContext())->getPointerTo();
+ auto &ST = *cast<StructType>(NewMembers.front()->getType());
+ Type *Tys[3] = {ST.getElementType(0), ST.getElementType(1), VoidPtrTy};
+ EltTy = StructType::get(GV.getContext(), Tys, false);
+ }
+
+ for (auto *V : NewMembers) {
+ Constant *NewV;
+ if (IsOldCtorDtor) {
+ auto *S = cast<ConstantStruct>(V);
+ auto *E1 = cast<Constant>(mapValue(S->getOperand(0)));
+ auto *E2 = cast<Constant>(mapValue(S->getOperand(1)));
+ Constant *Null = Constant::getNullValue(VoidPtrTy);
+ NewV = ConstantStruct::get(cast<StructType>(EltTy), E1, E2, Null);
+ } else {
+ NewV = cast_or_null<Constant>(mapValue(V));
+ }
+ Elements.push_back(NewV);
+ }
+
+ GV.setInitializer(ConstantArray::get(
+ cast<ArrayType>(GV.getType()->getElementType()), Elements));
+}
+
+void Mapper::scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init,
+ unsigned MCID) {
+ assert(AlreadyScheduled.insert(&GV).second && "Should not reschedule");
+ assert(MCID < MCs.size() && "Invalid mapping context");
+
+ WorklistEntry WE;
+ WE.Kind = WorklistEntry::MapGlobalInit;
+ WE.MCID = MCID;
+ WE.Data.GVInit.GV = &GV;
+ WE.Data.GVInit.Init = &Init;
+ Worklist.push_back(WE);
+}
+
+void Mapper::scheduleMapAppendingVariable(GlobalVariable &GV,
+ Constant *InitPrefix,
+ bool IsOldCtorDtor,
+ ArrayRef<Constant *> NewMembers,
+ unsigned MCID) {
+ assert(AlreadyScheduled.insert(&GV).second && "Should not reschedule");
+ assert(MCID < MCs.size() && "Invalid mapping context");
+
+ WorklistEntry WE;
+ WE.Kind = WorklistEntry::MapAppendingVar;
+ WE.MCID = MCID;
+ WE.Data.AppendingGV.GV = &GV;
+ WE.Data.AppendingGV.InitPrefix = InitPrefix;
+ WE.AppendingGVIsOldCtorDtor = IsOldCtorDtor;
+ WE.AppendingGVNumNewMembers = NewMembers.size();
+ Worklist.push_back(WE);
+ AppendingInits.append(NewMembers.begin(), NewMembers.end());
+}
+
+void Mapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee,
+ unsigned MCID) {
+ assert(AlreadyScheduled.insert(&GA).second && "Should not reschedule");
+ assert(MCID < MCs.size() && "Invalid mapping context");
+
+ WorklistEntry WE;
+ WE.Kind = WorklistEntry::MapGlobalAliasee;
+ WE.MCID = MCID;
+ WE.Data.GlobalAliasee.GA = &GA;
+ WE.Data.GlobalAliasee.Aliasee = &Aliasee;
+ Worklist.push_back(WE);
+}
+
+void Mapper::scheduleRemapFunction(Function &F, unsigned MCID) {
+ assert(AlreadyScheduled.insert(&F).second && "Should not reschedule");
+ assert(MCID < MCs.size() && "Invalid mapping context");
+
+ WorklistEntry WE;
+ WE.Kind = WorklistEntry::RemapFunction;
+ WE.MCID = MCID;
+ WE.Data.RemapF = &F;
+ Worklist.push_back(WE);
+}
+
+void Mapper::addFlags(RemapFlags Flags) {
+ assert(!hasWorkToDo() && "Expected to have flushed the worklist");
+ this->Flags = this->Flags | Flags;
+}
+
+static Mapper *getAsMapper(void *pImpl) {
+ return reinterpret_cast<Mapper *>(pImpl);
+}
+
+namespace {
+
+class FlushingMapper {
+ Mapper &M;
+
+public:
+ explicit FlushingMapper(void *pImpl) : M(*getAsMapper(pImpl)) {
+ assert(!M.hasWorkToDo() && "Expected to be flushed");
+ }
+ ~FlushingMapper() { M.flush(); }
+ Mapper *operator->() const { return &M; }
+};
+
+} // end namespace
+
+ValueMapper::ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags,
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer)
+ : pImpl(new Mapper(VM, Flags, TypeMapper, Materializer)) {}
+
+ValueMapper::~ValueMapper() { delete getAsMapper(pImpl); }
+
+unsigned
+ValueMapper::registerAlternateMappingContext(ValueToValueMapTy &VM,
+ ValueMaterializer *Materializer) {
+ return getAsMapper(pImpl)->registerAlternateMappingContext(VM, Materializer);
+}
+
+void ValueMapper::addFlags(RemapFlags Flags) {
+ FlushingMapper(pImpl)->addFlags(Flags);
+}
+
+Value *ValueMapper::mapValue(const Value &V) {
+ return FlushingMapper(pImpl)->mapValue(&V);
+}
+
+Constant *ValueMapper::mapConstant(const Constant &C) {
+ return cast_or_null<Constant>(mapValue(C));
+}
+
+Metadata *ValueMapper::mapMetadata(const Metadata &MD) {
+ return FlushingMapper(pImpl)->mapMetadata(&MD);
+}
+
+MDNode *ValueMapper::mapMDNode(const MDNode &N) {
+ return cast_or_null<MDNode>(mapMetadata(N));
+}
+
+void ValueMapper::remapInstruction(Instruction &I) {
+ FlushingMapper(pImpl)->remapInstruction(&I);
+}
+
+void ValueMapper::remapFunction(Function &F) {
+ FlushingMapper(pImpl)->remapFunction(F);
+}
+
+void ValueMapper::scheduleMapGlobalInitializer(GlobalVariable &GV,
+ Constant &Init,
+ unsigned MCID) {
+ getAsMapper(pImpl)->scheduleMapGlobalInitializer(GV, Init, MCID);
+}
+
+void ValueMapper::scheduleMapAppendingVariable(GlobalVariable &GV,
+ Constant *InitPrefix,
+ bool IsOldCtorDtor,
+ ArrayRef<Constant *> NewMembers,
+ unsigned MCID) {
+ getAsMapper(pImpl)->scheduleMapAppendingVariable(
+ GV, InitPrefix, IsOldCtorDtor, NewMembers, MCID);
+}
+
+void ValueMapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee,
+ unsigned MCID) {
+ getAsMapper(pImpl)->scheduleMapGlobalAliasee(GA, Aliasee, MCID);
+}
+
+void ValueMapper::scheduleRemapFunction(Function &F, unsigned MCID) {
+ getAsMapper(pImpl)->scheduleRemapFunction(F, MCID);
+}