aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Transforms/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils')
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp152
-rw-r--r--contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp276
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp934
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp473
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp1340
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp474
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp461
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp104
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp863
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneModule.cpp202
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp1567
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp159
-rw-r--r--contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp153
-rw-r--r--contrib/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp171
-rw-r--r--contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp94
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Evaluator.cpp731
-rw-r--r--contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp491
-rw-r--r--contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp948
-rw-r--r--contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp313
-rw-r--r--contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp194
-rw-r--r--contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp63
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp202
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp2417
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp62
-rw-r--r--contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp673
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LCSSA.cpp497
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp561
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp2956
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp690
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp920
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp978
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp820
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp744
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp958
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp976
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp326
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp96
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp451
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp618
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp115
-rw-r--r--contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp177
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp282
-rw-r--r--contrib/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp120
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp852
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp1007
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp495
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp190
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp107
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp6081
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp957
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp3159
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SizeOpts.cpp37
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SplitModule.cpp284
-rw-r--r--contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp75
-rw-r--r--contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp41
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp584
-rw-r--r--contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp114
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Utils.cpp59
-rw-r--r--contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp539
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp1157
60 files changed, 0 insertions, 41540 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
deleted file mode 100644
index 01912297324a..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-//===-- ASanStackFrameLayout.cpp - helper for AddressSanitizer ------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Definition of ComputeASanStackFrameLayout (see ASanStackFrameLayout.h).
-//
-//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/ScopedPrinter.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-
-namespace llvm {
-
-// We sort the stack variables by alignment (largest first) to minimize
-// unnecessary large gaps due to alignment.
-// It is tempting to also sort variables by size so that larger variables
-// have larger redzones at both ends. But reordering will make report analysis
-// harder, especially when temporary unnamed variables are present.
-// So, until we can provide more information (type, line number, etc)
-// for the stack variables we avoid reordering them too much.
-static inline bool CompareVars(const ASanStackVariableDescription &a,
- const ASanStackVariableDescription &b) {
- return a.Alignment > b.Alignment;
-}
-
-// We also force minimal alignment for all vars to kMinAlignment so that vars
-// with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars.
-static const size_t kMinAlignment = 16;
-
-// We want to add a full redzone after every variable.
-// The larger the variable Size the larger is the redzone.
-// The resulting frame size is a multiple of Alignment.
-static size_t VarAndRedzoneSize(size_t Size, size_t Granularity,
- size_t Alignment) {
- size_t Res = 0;
- if (Size <= 4) Res = 16;
- else if (Size <= 16) Res = 32;
- else if (Size <= 128) Res = Size + 32;
- else if (Size <= 512) Res = Size + 64;
- else if (Size <= 4096) Res = Size + 128;
- else Res = Size + 256;
- return alignTo(std::max(Res, 2 * Granularity), Alignment);
-}
-
-ASanStackFrameLayout
-ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars,
- size_t Granularity, size_t MinHeaderSize) {
- assert(Granularity >= 8 && Granularity <= 64 &&
- (Granularity & (Granularity - 1)) == 0);
- assert(MinHeaderSize >= 16 && (MinHeaderSize & (MinHeaderSize - 1)) == 0 &&
- MinHeaderSize >= Granularity);
- const size_t NumVars = Vars.size();
- assert(NumVars > 0);
- for (size_t i = 0; i < NumVars; i++)
- Vars[i].Alignment = std::max(Vars[i].Alignment, kMinAlignment);
-
- llvm::stable_sort(Vars, CompareVars);
-
- ASanStackFrameLayout Layout;
- Layout.Granularity = Granularity;
- Layout.FrameAlignment = std::max(Granularity, Vars[0].Alignment);
- size_t Offset = std::max(std::max(MinHeaderSize, Granularity),
- Vars[0].Alignment);
- assert((Offset % Granularity) == 0);
- for (size_t i = 0; i < NumVars; i++) {
- bool IsLast = i == NumVars - 1;
- size_t Alignment = std::max(Granularity, Vars[i].Alignment);
- (void)Alignment; // Used only in asserts.
- size_t Size = Vars[i].Size;
- assert((Alignment & (Alignment - 1)) == 0);
- assert(Layout.FrameAlignment >= Alignment);
- assert((Offset % Alignment) == 0);
- assert(Size > 0);
- size_t NextAlignment = IsLast ? Granularity
- : std::max(Granularity, Vars[i + 1].Alignment);
- size_t SizeWithRedzone = VarAndRedzoneSize(Size, Granularity,
- NextAlignment);
- Vars[i].Offset = Offset;
- Offset += SizeWithRedzone;
- }
- if (Offset % MinHeaderSize) {
- Offset += MinHeaderSize - (Offset % MinHeaderSize);
- }
- Layout.FrameSize = Offset;
- assert((Layout.FrameSize % MinHeaderSize) == 0);
- return Layout;
-}
-
-SmallString<64> ComputeASanStackFrameDescription(
- const SmallVectorImpl<ASanStackVariableDescription> &Vars) {
- SmallString<2048> StackDescriptionStorage;
- raw_svector_ostream StackDescription(StackDescriptionStorage);
- StackDescription << Vars.size();
-
- for (const auto &Var : Vars) {
- std::string Name = Var.Name;
- if (Var.Line) {
- Name += ":";
- Name += to_string(Var.Line);
- }
- StackDescription << " " << Var.Offset << " " << Var.Size << " "
- << Name.size() << " " << Name;
- }
- return StackDescription.str();
-}
-
-SmallVector<uint8_t, 64>
-GetShadowBytes(const SmallVectorImpl<ASanStackVariableDescription> &Vars,
- const ASanStackFrameLayout &Layout) {
- assert(Vars.size() > 0);
- SmallVector<uint8_t, 64> SB;
- SB.clear();
- const size_t Granularity = Layout.Granularity;
- SB.resize(Vars[0].Offset / Granularity, kAsanStackLeftRedzoneMagic);
- for (const auto &Var : Vars) {
- SB.resize(Var.Offset / Granularity, kAsanStackMidRedzoneMagic);
-
- SB.resize(SB.size() + Var.Size / Granularity, 0);
- if (Var.Size % Granularity)
- SB.push_back(Var.Size % Granularity);
- }
- SB.resize(Layout.FrameSize / Granularity, kAsanStackRightRedzoneMagic);
- return SB;
-}
-
-SmallVector<uint8_t, 64> GetShadowBytesAfterScope(
- const SmallVectorImpl<ASanStackVariableDescription> &Vars,
- const ASanStackFrameLayout &Layout) {
- SmallVector<uint8_t, 64> SB = GetShadowBytes(Vars, Layout);
- const size_t Granularity = Layout.Granularity;
-
- for (const auto &Var : Vars) {
- assert(Var.LifetimeSize <= Var.Size);
- const size_t LifetimeShadowSize =
- (Var.LifetimeSize + Granularity - 1) / Granularity;
- const size_t Offset = Var.Offset / Granularity;
- std::fill(SB.begin() + Offset, SB.begin() + Offset + LifetimeShadowSize,
- kAsanStackUseAfterScopeMagic);
- }
-
- return SB;
-}
-
-} // llvm namespace
diff --git a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
deleted file mode 100644
index ee0973002c47..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
+++ /dev/null
@@ -1,276 +0,0 @@
-//===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file adds DWARF discriminators to the IR. Path discriminators are
-// used to decide what CFG path was taken inside sub-graphs whose instructions
-// share the same line and column number information.
-//
-// The main user of this is the sample profiler. Instruction samples are
-// mapped to line number information. Since a single line may be spread
-// out over several basic blocks, discriminators add more precise location
-// for the samples.
-//
-// For example,
-//
-// 1 #define ASSERT(P)
-// 2 if (!(P))
-// 3 abort()
-// ...
-// 100 while (true) {
-// 101 ASSERT (sum < 0);
-// 102 ...
-// 130 }
-//
-// when converted to IR, this snippet looks something like:
-//
-// while.body: ; preds = %entry, %if.end
-// %0 = load i32* %sum, align 4, !dbg !15
-// %cmp = icmp slt i32 %0, 0, !dbg !15
-// br i1 %cmp, label %if.end, label %if.then, !dbg !15
-//
-// if.then: ; preds = %while.body
-// call void @abort(), !dbg !15
-// br label %if.end, !dbg !15
-//
-// Notice that all the instructions in blocks 'while.body' and 'if.then'
-// have exactly the same debug information. When this program is sampled
-// at runtime, the profiler will assume that all these instructions are
-// equally frequent. This, in turn, will consider the edge while.body->if.then
-// to be frequently taken (which is incorrect).
-//
-// By adding a discriminator value to the instructions in block 'if.then',
-// we can distinguish instructions at line 101 with discriminator 0 from
-// the instructions at line 101 with discriminator 1.
-//
-// For more details about DWARF discriminators, please visit
-// http://wiki.dwarfstd.org/index.php?title=Path_Discriminators
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/AddDiscriminators.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils.h"
-#include <utility>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "add-discriminators"
-
-// Command line option to disable discriminator generation even in the
-// presence of debug information. This is only needed when debugging
-// debug info generation issues.
-static cl::opt<bool> NoDiscriminators(
- "no-discriminators", cl::init(false),
- cl::desc("Disable generation of discriminator information."));
-
-namespace {
-
-// The legacy pass of AddDiscriminators.
-struct AddDiscriminatorsLegacyPass : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
-
- AddDiscriminatorsLegacyPass() : FunctionPass(ID) {
- initializeAddDiscriminatorsLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-};
-
-} // end anonymous namespace
-
-char AddDiscriminatorsLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(AddDiscriminatorsLegacyPass, "add-discriminators",
- "Add DWARF path discriminators", false, false)
-INITIALIZE_PASS_END(AddDiscriminatorsLegacyPass, "add-discriminators",
- "Add DWARF path discriminators", false, false)
-
-// Create the legacy AddDiscriminatorsPass.
-FunctionPass *llvm::createAddDiscriminatorsPass() {
- return new AddDiscriminatorsLegacyPass();
-}
-
-static bool shouldHaveDiscriminator(const Instruction *I) {
- return !isa<IntrinsicInst>(I) || isa<MemIntrinsic>(I);
-}
-
-/// Assign DWARF discriminators.
-///
-/// To assign discriminators, we examine the boundaries of every
-/// basic block and its successors. Suppose there is a basic block B1
-/// with successor B2. The last instruction I1 in B1 and the first
-/// instruction I2 in B2 are located at the same file and line number.
-/// This situation is illustrated in the following code snippet:
-///
-/// if (i < 10) x = i;
-///
-/// entry:
-/// br i1 %cmp, label %if.then, label %if.end, !dbg !10
-/// if.then:
-/// %1 = load i32* %i.addr, align 4, !dbg !10
-/// store i32 %1, i32* %x, align 4, !dbg !10
-/// br label %if.end, !dbg !10
-/// if.end:
-/// ret void, !dbg !12
-///
-/// Notice how the branch instruction in block 'entry' and all the
-/// instructions in block 'if.then' have the exact same debug location
-/// information (!dbg !10).
-///
-/// To distinguish instructions in block 'entry' from instructions in
-/// block 'if.then', we generate a new lexical block for all the
-/// instruction in block 'if.then' that share the same file and line
-/// location with the last instruction of block 'entry'.
-///
-/// This new lexical block will have the same location information as
-/// the previous one, but with a new DWARF discriminator value.
-///
-/// One of the main uses of this discriminator value is in runtime
-/// sample profilers. It allows the profiler to distinguish instructions
-/// at location !dbg !10 that execute on different basic blocks. This is
-/// important because while the predicate 'if (x < 10)' may have been
-/// executed millions of times, the assignment 'x = i' may have only
-/// executed a handful of times (meaning that the entry->if.then edge is
-/// seldom taken).
-///
-/// If we did not have discriminator information, the profiler would
-/// assign the same weight to both blocks 'entry' and 'if.then', which
-/// in turn will make it conclude that the entry->if.then edge is very
-/// hot.
-///
-/// To decide where to create new discriminator values, this function
-/// traverses the CFG and examines instruction at basic block boundaries.
-/// If the last instruction I1 of a block B1 is at the same file and line
-/// location as instruction I2 of successor B2, then it creates a new
-/// lexical block for I2 and all the instruction in B2 that share the same
-/// file and line location as I2. This new lexical block will have a
-/// different discriminator number than I1.
-static bool addDiscriminators(Function &F) {
- // If the function has debug information, but the user has disabled
- // discriminators, do nothing.
- // Simlarly, if the function has no debug info, do nothing.
- if (NoDiscriminators || !F.getSubprogram())
- return false;
-
- bool Changed = false;
-
- using Location = std::pair<StringRef, unsigned>;
- using BBSet = DenseSet<const BasicBlock *>;
- using LocationBBMap = DenseMap<Location, BBSet>;
- using LocationDiscriminatorMap = DenseMap<Location, unsigned>;
- using LocationSet = DenseSet<Location>;
-
- LocationBBMap LBM;
- LocationDiscriminatorMap LDM;
-
- // Traverse all instructions in the function. If the source line location
- // of the instruction appears in other basic block, assign a new
- // discriminator for this instruction.
- for (BasicBlock &B : F) {
- for (auto &I : B.getInstList()) {
- // Not all intrinsic calls should have a discriminator.
- // We want to avoid a non-deterministic assignment of discriminators at
- // different debug levels. We still allow discriminators on memory
- // intrinsic calls because those can be early expanded by SROA into
- // pairs of loads and stores, and the expanded load/store instructions
- // should have a valid discriminator.
- if (!shouldHaveDiscriminator(&I))
- continue;
- const DILocation *DIL = I.getDebugLoc();
- if (!DIL)
- continue;
- Location L = std::make_pair(DIL->getFilename(), DIL->getLine());
- auto &BBMap = LBM[L];
- auto R = BBMap.insert(&B);
- if (BBMap.size() == 1)
- continue;
- // If we could insert more than one block with the same line+file, a
- // discriminator is needed to distinguish both instructions.
- // Only the lowest 7 bits are used to represent a discriminator to fit
- // it in 1 byte ULEB128 representation.
- unsigned Discriminator = R.second ? ++LDM[L] : LDM[L];
- auto NewDIL = DIL->cloneWithBaseDiscriminator(Discriminator);
- if (!NewDIL) {
- LLVM_DEBUG(dbgs() << "Could not encode discriminator: "
- << DIL->getFilename() << ":" << DIL->getLine() << ":"
- << DIL->getColumn() << ":" << Discriminator << " "
- << I << "\n");
- } else {
- I.setDebugLoc(NewDIL.getValue());
- LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
- << DIL->getColumn() << ":" << Discriminator << " " << I
- << "\n");
- }
- Changed = true;
- }
- }
-
- // Traverse all instructions and assign new discriminators to call
- // instructions with the same lineno that are in the same basic block.
- // Sample base profile needs to distinguish different function calls within
- // a same source line for correct profile annotation.
- for (BasicBlock &B : F) {
- LocationSet CallLocations;
- for (auto &I : B.getInstList()) {
- // We bypass intrinsic calls for the following two reasons:
- // 1) We want to avoid a non-deterministic assigment of
- // discriminators.
- // 2) We want to minimize the number of base discriminators used.
- if (!isa<InvokeInst>(I) && (!isa<CallInst>(I) || isa<IntrinsicInst>(I)))
- continue;
-
- DILocation *CurrentDIL = I.getDebugLoc();
- if (!CurrentDIL)
- continue;
- Location L =
- std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
- if (!CallLocations.insert(L).second) {
- unsigned Discriminator = ++LDM[L];
- auto NewDIL = CurrentDIL->cloneWithBaseDiscriminator(Discriminator);
- if (!NewDIL) {
- LLVM_DEBUG(dbgs()
- << "Could not encode discriminator: "
- << CurrentDIL->getFilename() << ":"
- << CurrentDIL->getLine() << ":" << CurrentDIL->getColumn()
- << ":" << Discriminator << " " << I << "\n");
- } else {
- I.setDebugLoc(NewDIL.getValue());
- Changed = true;
- }
- }
- }
- }
- return Changed;
-}
-
-bool AddDiscriminatorsLegacyPass::runOnFunction(Function &F) {
- return addDiscriminators(F);
-}
-
-PreservedAnalyses AddDiscriminatorsPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- if (!addDiscriminators(F))
- return PreservedAnalyses::all();
-
- // FIXME: should be all()
- return PreservedAnalyses::none();
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
deleted file mode 100644
index 5fa371377c85..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ /dev/null
@@ -1,934 +0,0 @@
-//===- BasicBlockUtils.cpp - BasicBlock Utilities --------------------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This family of functions perform manipulations on basic blocks, and
-// instructions contained within basic blocks.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Analysis/MemorySSAUpdater.h"
-#include "llvm/Analysis/PostDominators.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
-#include "llvm/IR/ValueHandle.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include <cassert>
-#include <cstdint>
-#include <string>
-#include <utility>
-#include <vector>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "basicblock-utils"
-
-void llvm::DetatchDeadBlocks(
- ArrayRef<BasicBlock *> BBs,
- SmallVectorImpl<DominatorTree::UpdateType> *Updates,
- bool KeepOneInputPHIs) {
- for (auto *BB : BBs) {
- // Loop through all of our successors and make sure they know that one
- // of their predecessors is going away.
- SmallPtrSet<BasicBlock *, 4> UniqueSuccessors;
- for (BasicBlock *Succ : successors(BB)) {
- Succ->removePredecessor(BB, KeepOneInputPHIs);
- if (Updates && UniqueSuccessors.insert(Succ).second)
- Updates->push_back({DominatorTree::Delete, BB, Succ});
- }
-
- // Zap all the instructions in the block.
- while (!BB->empty()) {
- Instruction &I = BB->back();
- // If this instruction is used, replace uses with an arbitrary value.
- // Because control flow can't get here, we don't care what we replace the
- // value with. Note that since this block is unreachable, and all values
- // contained within it must dominate their uses, that all uses will
- // eventually be removed (they are themselves dead).
- if (!I.use_empty())
- I.replaceAllUsesWith(UndefValue::get(I.getType()));
- BB->getInstList().pop_back();
- }
- new UnreachableInst(BB->getContext(), BB);
- assert(BB->getInstList().size() == 1 &&
- isa<UnreachableInst>(BB->getTerminator()) &&
- "The successor list of BB isn't empty before "
- "applying corresponding DTU updates.");
- }
-}
-
-void llvm::DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU,
- bool KeepOneInputPHIs) {
- DeleteDeadBlocks({BB}, DTU, KeepOneInputPHIs);
-}
-
-void llvm::DeleteDeadBlocks(ArrayRef <BasicBlock *> BBs, DomTreeUpdater *DTU,
- bool KeepOneInputPHIs) {
-#ifndef NDEBUG
- // Make sure that all predecessors of each dead block is also dead.
- SmallPtrSet<BasicBlock *, 4> Dead(BBs.begin(), BBs.end());
- assert(Dead.size() == BBs.size() && "Duplicating blocks?");
- for (auto *BB : Dead)
- for (BasicBlock *Pred : predecessors(BB))
- assert(Dead.count(Pred) && "All predecessors must be dead!");
-#endif
-
- SmallVector<DominatorTree::UpdateType, 4> Updates;
- DetatchDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs);
-
- if (DTU)
- DTU->applyUpdatesPermissive(Updates);
-
- for (BasicBlock *BB : BBs)
- if (DTU)
- DTU->deleteBB(BB);
- else
- BB->eraseFromParent();
-}
-
-bool llvm::EliminateUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
- bool KeepOneInputPHIs) {
- df_iterator_default_set<BasicBlock*> Reachable;
-
- // Mark all reachable blocks.
- for (BasicBlock *BB : depth_first_ext(&F, Reachable))
- (void)BB/* Mark all reachable blocks */;
-
- // Collect all dead blocks.
- std::vector<BasicBlock*> DeadBlocks;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
- if (!Reachable.count(&*I)) {
- BasicBlock *BB = &*I;
- DeadBlocks.push_back(BB);
- }
-
- // Delete the dead blocks.
- DeleteDeadBlocks(DeadBlocks, DTU, KeepOneInputPHIs);
-
- return !DeadBlocks.empty();
-}
-
-void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
- MemoryDependenceResults *MemDep) {
- if (!isa<PHINode>(BB->begin())) return;
-
- while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
- if (PN->getIncomingValue(0) != PN)
- PN->replaceAllUsesWith(PN->getIncomingValue(0));
- else
- PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
-
- if (MemDep)
- MemDep->removeInstruction(PN); // Memdep updates AA itself.
-
- PN->eraseFromParent();
- }
-}
-
-bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
- // Recursively deleting a PHI may cause multiple PHIs to be deleted
- // or RAUW'd undef, so use an array of WeakTrackingVH for the PHIs to delete.
- SmallVector<WeakTrackingVH, 8> PHIs;
- for (PHINode &PN : BB->phis())
- PHIs.push_back(&PN);
-
- bool Changed = false;
- for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
- if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*()))
- Changed |= RecursivelyDeleteDeadPHINode(PN, TLI);
-
- return Changed;
-}
-
-bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
- LoopInfo *LI, MemorySSAUpdater *MSSAU,
- MemoryDependenceResults *MemDep) {
- if (BB->hasAddressTaken())
- return false;
-
- // Can't merge if there are multiple predecessors, or no predecessors.
- BasicBlock *PredBB = BB->getUniquePredecessor();
- if (!PredBB) return false;
-
- // Don't break self-loops.
- if (PredBB == BB) return false;
- // Don't break unwinding instructions.
- if (PredBB->getTerminator()->isExceptionalTerminator())
- return false;
-
- // Can't merge if there are multiple distinct successors.
- if (PredBB->getUniqueSuccessor() != BB)
- return false;
-
- // Can't merge if there is PHI loop.
- for (PHINode &PN : BB->phis())
- for (Value *IncValue : PN.incoming_values())
- if (IncValue == &PN)
- return false;
-
- LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into "
- << PredBB->getName() << "\n");
-
- // Begin by getting rid of unneeded PHIs.
- SmallVector<AssertingVH<Value>, 4> IncomingValues;
- if (isa<PHINode>(BB->front())) {
- for (PHINode &PN : BB->phis())
- if (!isa<PHINode>(PN.getIncomingValue(0)) ||
- cast<PHINode>(PN.getIncomingValue(0))->getParent() != BB)
- IncomingValues.push_back(PN.getIncomingValue(0));
- FoldSingleEntryPHINodes(BB, MemDep);
- }
-
- // DTU update: Collect all the edges that exit BB.
- // These dominator edges will be redirected from Pred.
- std::vector<DominatorTree::UpdateType> Updates;
- if (DTU) {
- Updates.reserve(1 + (2 * succ_size(BB)));
- // Add insert edges first. Experimentally, for the particular case of two
- // blocks that can be merged, with a single successor and single predecessor
- // respectively, it is beneficial to have all insert updates first. Deleting
- // edges first may lead to unreachable blocks, followed by inserting edges
- // making the blocks reachable again. Such DT updates lead to high compile
- // times. We add inserts before deletes here to reduce compile time.
- for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
- // This successor of BB may already have PredBB as a predecessor.
- if (llvm::find(successors(PredBB), *I) == succ_end(PredBB))
- Updates.push_back({DominatorTree::Insert, PredBB, *I});
- for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
- Updates.push_back({DominatorTree::Delete, BB, *I});
- Updates.push_back({DominatorTree::Delete, PredBB, BB});
- }
-
- if (MSSAU)
- MSSAU->moveAllAfterMergeBlocks(BB, PredBB, &*(BB->begin()));
-
- // Delete the unconditional branch from the predecessor...
- PredBB->getInstList().pop_back();
-
- // Make all PHI nodes that referred to BB now refer to Pred as their
- // source...
- BB->replaceAllUsesWith(PredBB);
-
- // Move all definitions in the successor to the predecessor...
- PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
- new UnreachableInst(BB->getContext(), BB);
-
- // Eliminate duplicate dbg.values describing the entry PHI node post-splice.
- for (auto Incoming : IncomingValues) {
- if (isa<Instruction>(*Incoming)) {
- SmallVector<DbgValueInst *, 2> DbgValues;
- SmallDenseSet<std::pair<DILocalVariable *, DIExpression *>, 2>
- DbgValueSet;
- llvm::findDbgValues(DbgValues, Incoming);
- for (auto &DVI : DbgValues) {
- auto R = DbgValueSet.insert({DVI->getVariable(), DVI->getExpression()});
- if (!R.second)
- DVI->eraseFromParent();
- }
- }
- }
-
- // Inherit predecessors name if it exists.
- if (!PredBB->hasName())
- PredBB->takeName(BB);
-
- if (LI)
- LI->removeBlock(BB);
-
- if (MemDep)
- MemDep->invalidateCachedPredecessors();
-
- // Finally, erase the old block and update dominator info.
- if (DTU) {
- assert(BB->getInstList().size() == 1 &&
- isa<UnreachableInst>(BB->getTerminator()) &&
- "The successor list of BB isn't empty before "
- "applying corresponding DTU updates.");
- DTU->applyUpdatesPermissive(Updates);
- DTU->deleteBB(BB);
- }
-
- else {
- BB->eraseFromParent(); // Nuke BB if DTU is nullptr.
- }
- return true;
-}
-
-void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL,
- BasicBlock::iterator &BI, Value *V) {
- Instruction &I = *BI;
- // Replaces all of the uses of the instruction with uses of the value
- I.replaceAllUsesWith(V);
-
- // Make sure to propagate a name if there is one already.
- if (I.hasName() && !V->hasName())
- V->takeName(&I);
-
- // Delete the unnecessary instruction now...
- BI = BIL.erase(BI);
-}
-
-void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
- BasicBlock::iterator &BI, Instruction *I) {
- assert(I->getParent() == nullptr &&
- "ReplaceInstWithInst: Instruction already inserted into basic block!");
-
- // Copy debug location to newly added instruction, if it wasn't already set
- // by the caller.
- if (!I->getDebugLoc())
- I->setDebugLoc(BI->getDebugLoc());
-
- // Insert the new instruction into the basic block...
- BasicBlock::iterator New = BIL.insert(BI, I);
-
- // Replace all uses of the old instruction, and delete it.
- ReplaceInstWithValue(BIL, BI, I);
-
- // Move BI back to point to the newly inserted instruction
- BI = New;
-}
-
-void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
- BasicBlock::iterator BI(From);
- ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
-}
-
-BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
- LoopInfo *LI, MemorySSAUpdater *MSSAU) {
- unsigned SuccNum = GetSuccessorNumber(BB, Succ);
-
- // If this is a critical edge, let SplitCriticalEdge do it.
- Instruction *LatchTerm = BB->getTerminator();
- if (SplitCriticalEdge(
- LatchTerm, SuccNum,
- CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()))
- return LatchTerm->getSuccessor(SuccNum);
-
- // If the edge isn't critical, then BB has a single successor or Succ has a
- // single pred. Split the block.
- if (BasicBlock *SP = Succ->getSinglePredecessor()) {
- // If the successor only has a single pred, split the top of the successor
- // block.
- assert(SP == BB && "CFG broken");
- SP = nullptr;
- return SplitBlock(Succ, &Succ->front(), DT, LI, MSSAU);
- }
-
- // Otherwise, if BB has a single successor, split it at the bottom of the
- // block.
- assert(BB->getTerminator()->getNumSuccessors() == 1 &&
- "Should have a single succ!");
- return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU);
-}
-
-unsigned
-llvm::SplitAllCriticalEdges(Function &F,
- const CriticalEdgeSplittingOptions &Options) {
- unsigned NumBroken = 0;
- for (BasicBlock &BB : F) {
- Instruction *TI = BB.getTerminator();
- if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- if (SplitCriticalEdge(TI, i, Options))
- ++NumBroken;
- }
- return NumBroken;
-}
-
-BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
- DominatorTree *DT, LoopInfo *LI,
- MemorySSAUpdater *MSSAU) {
- BasicBlock::iterator SplitIt = SplitPt->getIterator();
- while (isa<PHINode>(SplitIt) || SplitIt->isEHPad())
- ++SplitIt;
- BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
-
- // The new block lives in whichever loop the old one did. This preserves
- // LCSSA as well, because we force the split point to be after any PHI nodes.
- if (LI)
- if (Loop *L = LI->getLoopFor(Old))
- L->addBasicBlockToLoop(New, *LI);
-
- if (DT)
- // Old dominates New. New node dominates all other nodes dominated by Old.
- if (DomTreeNode *OldNode = DT->getNode(Old)) {
- std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
-
- DomTreeNode *NewNode = DT->addNewBlock(New, Old);
- for (DomTreeNode *I : Children)
- DT->changeImmediateDominator(I, NewNode);
- }
-
- // Move MemoryAccesses still tracked in Old, but part of New now.
- // Update accesses in successor blocks accordingly.
- if (MSSAU)
- MSSAU->moveAllAfterSpliceBlocks(Old, New, &*(New->begin()));
-
- return New;
-}
-
-/// Update DominatorTree, LoopInfo, and LCCSA analysis information.
-static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
- ArrayRef<BasicBlock *> Preds,
- DominatorTree *DT, LoopInfo *LI,
- MemorySSAUpdater *MSSAU,
- bool PreserveLCSSA, bool &HasLoopExit) {
- // Update dominator tree if available.
- if (DT) {
- if (OldBB == DT->getRootNode()->getBlock()) {
- assert(NewBB == &NewBB->getParent()->getEntryBlock());
- DT->setNewRoot(NewBB);
- } else {
- // Split block expects NewBB to have a non-empty set of predecessors.
- DT->splitBlock(NewBB);
- }
- }
-
- // Update MemoryPhis after split if MemorySSA is available
- if (MSSAU)
- MSSAU->wireOldPredecessorsToNewImmediatePredecessor(OldBB, NewBB, Preds);
-
- // The rest of the logic is only relevant for updating the loop structures.
- if (!LI)
- return;
-
- assert(DT && "DT should be available to update LoopInfo!");
- Loop *L = LI->getLoopFor(OldBB);
-
- // If we need to preserve loop analyses, collect some information about how
- // this split will affect loops.
- bool IsLoopEntry = !!L;
- bool SplitMakesNewLoopHeader = false;
- for (BasicBlock *Pred : Preds) {
- // Preds that are not reachable from entry should not be used to identify if
- // OldBB is a loop entry or if SplitMakesNewLoopHeader. Unreachable blocks
- // are not within any loops, so we incorrectly mark SplitMakesNewLoopHeader
- // as true and make the NewBB the header of some loop. This breaks LI.
- if (!DT->isReachableFromEntry(Pred))
- continue;
- // If we need to preserve LCSSA, determine if any of the preds is a loop
- // exit.
- if (PreserveLCSSA)
- if (Loop *PL = LI->getLoopFor(Pred))
- if (!PL->contains(OldBB))
- HasLoopExit = true;
-
- // If we need to preserve LoopInfo, note whether any of the preds crosses
- // an interesting loop boundary.
- if (!L)
- continue;
- if (L->contains(Pred))
- IsLoopEntry = false;
- else
- SplitMakesNewLoopHeader = true;
- }
-
- // Unless we have a loop for OldBB, nothing else to do here.
- if (!L)
- return;
-
- if (IsLoopEntry) {
- // Add the new block to the nearest enclosing loop (and not an adjacent
- // loop). To find this, examine each of the predecessors and determine which
- // loops enclose them, and select the most-nested loop which contains the
- // loop containing the block being split.
- Loop *InnermostPredLoop = nullptr;
- for (BasicBlock *Pred : Preds) {
- if (Loop *PredLoop = LI->getLoopFor(Pred)) {
- // Seek a loop which actually contains the block being split (to avoid
- // adjacent loops).
- while (PredLoop && !PredLoop->contains(OldBB))
- PredLoop = PredLoop->getParentLoop();
-
- // Select the most-nested of these loops which contains the block.
- if (PredLoop && PredLoop->contains(OldBB) &&
- (!InnermostPredLoop ||
- InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth()))
- InnermostPredLoop = PredLoop;
- }
- }
-
- if (InnermostPredLoop)
- InnermostPredLoop->addBasicBlockToLoop(NewBB, *LI);
- } else {
- L->addBasicBlockToLoop(NewBB, *LI);
- if (SplitMakesNewLoopHeader)
- L->moveToHeader(NewBB);
- }
-}
-
-/// Update the PHI nodes in OrigBB to include the values coming from NewBB.
-/// This also updates AliasAnalysis, if available.
-static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
- ArrayRef<BasicBlock *> Preds, BranchInst *BI,
- bool HasLoopExit) {
- // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB.
- SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end());
- for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) {
- PHINode *PN = cast<PHINode>(I++);
-
- // Check to see if all of the values coming in are the same. If so, we
- // don't need to create a new PHI node, unless it's needed for LCSSA.
- Value *InVal = nullptr;
- if (!HasLoopExit) {
- InVal = PN->getIncomingValueForBlock(Preds[0]);
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- if (!PredSet.count(PN->getIncomingBlock(i)))
- continue;
- if (!InVal)
- InVal = PN->getIncomingValue(i);
- else if (InVal != PN->getIncomingValue(i)) {
- InVal = nullptr;
- break;
- }
- }
- }
-
- if (InVal) {
- // If all incoming values for the new PHI would be the same, just don't
- // make a new PHI. Instead, just remove the incoming values from the old
- // PHI.
-
- // NOTE! This loop walks backwards for a reason! First off, this minimizes
- // the cost of removal if we end up removing a large number of values, and
- // second off, this ensures that the indices for the incoming values
- // aren't invalidated when we remove one.
- for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i)
- if (PredSet.count(PN->getIncomingBlock(i)))
- PN->removeIncomingValue(i, false);
-
- // Add an incoming value to the PHI node in the loop for the preheader
- // edge.
- PN->addIncoming(InVal, NewBB);
- continue;
- }
-
- // If the values coming into the block are not the same, we need a new
- // PHI.
- // Create the new PHI node, insert it into NewBB at the end of the block
- PHINode *NewPHI =
- PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI);
-
- // NOTE! This loop walks backwards for a reason! First off, this minimizes
- // the cost of removal if we end up removing a large number of values, and
- // second off, this ensures that the indices for the incoming values aren't
- // invalidated when we remove one.
- for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) {
- BasicBlock *IncomingBB = PN->getIncomingBlock(i);
- if (PredSet.count(IncomingBB)) {
- Value *V = PN->removeIncomingValue(i, false);
- NewPHI->addIncoming(V, IncomingBB);
- }
- }
-
- PN->addIncoming(NewPHI, NewBB);
- }
-}
-
-BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
- ArrayRef<BasicBlock *> Preds,
- const char *Suffix, DominatorTree *DT,
- LoopInfo *LI, MemorySSAUpdater *MSSAU,
- bool PreserveLCSSA) {
- // Do not attempt to split that which cannot be split.
- if (!BB->canSplitPredecessors())
- return nullptr;
-
- // For the landingpads we need to act a bit differently.
- // Delegate this work to the SplitLandingPadPredecessors.
- if (BB->isLandingPad()) {
- SmallVector<BasicBlock*, 2> NewBBs;
- std::string NewName = std::string(Suffix) + ".split-lp";
-
- SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs, DT,
- LI, MSSAU, PreserveLCSSA);
- return NewBBs[0];
- }
-
- // Create new basic block, insert right before the original block.
- BasicBlock *NewBB = BasicBlock::Create(
- BB->getContext(), BB->getName() + Suffix, BB->getParent(), BB);
-
- // The new block unconditionally branches to the old block.
- BranchInst *BI = BranchInst::Create(BB, NewBB);
- // Splitting the predecessors of a loop header creates a preheader block.
- if (LI && LI->isLoopHeader(BB))
- // Using the loop start line number prevents debuggers stepping into the
- // loop body for this instruction.
- BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc());
- else
- BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
-
- // Move the edges from Preds to point to NewBB instead of BB.
- for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
- // This is slightly more strict than necessary; the minimum requirement
- // is that there be no more than one indirectbr branching to BB. And
- // all BlockAddress uses would need to be updated.
- assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
- "Cannot split an edge from an IndirectBrInst");
- assert(!isa<CallBrInst>(Preds[i]->getTerminator()) &&
- "Cannot split an edge from a CallBrInst");
- Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
- }
-
- // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
- // node becomes an incoming value for BB's phi node. However, if the Preds
- // list is empty, we need to insert dummy entries into the PHI nodes in BB to
- // account for the newly created predecessor.
- if (Preds.empty()) {
- // Insert dummy values as the incoming value.
- for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
- cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
- }
-
- // Update DominatorTree, LoopInfo, and LCCSA analysis information.
- bool HasLoopExit = false;
- UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, MSSAU, PreserveLCSSA,
- HasLoopExit);
-
- if (!Preds.empty()) {
- // Update the PHI nodes in BB with the values coming from NewBB.
- UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit);
- }
-
- return NewBB;
-}
-
-void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
- ArrayRef<BasicBlock *> Preds,
- const char *Suffix1, const char *Suffix2,
- SmallVectorImpl<BasicBlock *> &NewBBs,
- DominatorTree *DT, LoopInfo *LI,
- MemorySSAUpdater *MSSAU,
- bool PreserveLCSSA) {
- assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!");
-
- // Create a new basic block for OrigBB's predecessors listed in Preds. Insert
- // it right before the original block.
- BasicBlock *NewBB1 = BasicBlock::Create(OrigBB->getContext(),
- OrigBB->getName() + Suffix1,
- OrigBB->getParent(), OrigBB);
- NewBBs.push_back(NewBB1);
-
- // The new block unconditionally branches to the old block.
- BranchInst *BI1 = BranchInst::Create(OrigBB, NewBB1);
- BI1->setDebugLoc(OrigBB->getFirstNonPHI()->getDebugLoc());
-
- // Move the edges from Preds to point to NewBB1 instead of OrigBB.
- for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
- // This is slightly more strict than necessary; the minimum requirement
- // is that there be no more than one indirectbr branching to BB. And
- // all BlockAddress uses would need to be updated.
- assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
- "Cannot split an edge from an IndirectBrInst");
- Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1);
- }
-
- bool HasLoopExit = false;
- UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DT, LI, MSSAU, PreserveLCSSA,
- HasLoopExit);
-
- // Update the PHI nodes in OrigBB with the values coming from NewBB1.
- UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, HasLoopExit);
-
- // Move the remaining edges from OrigBB to point to NewBB2.
- SmallVector<BasicBlock*, 8> NewBB2Preds;
- for (pred_iterator i = pred_begin(OrigBB), e = pred_end(OrigBB);
- i != e; ) {
- BasicBlock *Pred = *i++;
- if (Pred == NewBB1) continue;
- assert(!isa<IndirectBrInst>(Pred->getTerminator()) &&
- "Cannot split an edge from an IndirectBrInst");
- NewBB2Preds.push_back(Pred);
- e = pred_end(OrigBB);
- }
-
- BasicBlock *NewBB2 = nullptr;
- if (!NewBB2Preds.empty()) {
- // Create another basic block for the rest of OrigBB's predecessors.
- NewBB2 = BasicBlock::Create(OrigBB->getContext(),
- OrigBB->getName() + Suffix2,
- OrigBB->getParent(), OrigBB);
- NewBBs.push_back(NewBB2);
-
- // The new block unconditionally branches to the old block.
- BranchInst *BI2 = BranchInst::Create(OrigBB, NewBB2);
- BI2->setDebugLoc(OrigBB->getFirstNonPHI()->getDebugLoc());
-
- // Move the remaining edges from OrigBB to point to NewBB2.
- for (BasicBlock *NewBB2Pred : NewBB2Preds)
- NewBB2Pred->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2);
-
- // Update DominatorTree, LoopInfo, and LCCSA analysis information.
- HasLoopExit = false;
- UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DT, LI, MSSAU,
- PreserveLCSSA, HasLoopExit);
-
- // Update the PHI nodes in OrigBB with the values coming from NewBB2.
- UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, HasLoopExit);
- }
-
- LandingPadInst *LPad = OrigBB->getLandingPadInst();
- Instruction *Clone1 = LPad->clone();
- Clone1->setName(Twine("lpad") + Suffix1);
- NewBB1->getInstList().insert(NewBB1->getFirstInsertionPt(), Clone1);
-
- if (NewBB2) {
- Instruction *Clone2 = LPad->clone();
- Clone2->setName(Twine("lpad") + Suffix2);
- NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2);
-
- // Create a PHI node for the two cloned landingpad instructions only
- // if the original landingpad instruction has some uses.
- if (!LPad->use_empty()) {
- assert(!LPad->getType()->isTokenTy() &&
- "Split cannot be applied if LPad is token type. Otherwise an "
- "invalid PHINode of token type would be created.");
- PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad);
- PN->addIncoming(Clone1, NewBB1);
- PN->addIncoming(Clone2, NewBB2);
- LPad->replaceAllUsesWith(PN);
- }
- LPad->eraseFromParent();
- } else {
- // There is no second clone. Just replace the landing pad with the first
- // clone.
- LPad->replaceAllUsesWith(Clone1);
- LPad->eraseFromParent();
- }
-}
-
-ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
- BasicBlock *Pred,
- DomTreeUpdater *DTU) {
- Instruction *UncondBranch = Pred->getTerminator();
- // Clone the return and add it to the end of the predecessor.
- Instruction *NewRet = RI->clone();
- Pred->getInstList().push_back(NewRet);
-
- // If the return instruction returns a value, and if the value was a
- // PHI node in "BB", propagate the right value into the return.
- for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
- i != e; ++i) {
- Value *V = *i;
- Instruction *NewBC = nullptr;
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) {
- // Return value might be bitcasted. Clone and insert it before the
- // return instruction.
- V = BCI->getOperand(0);
- NewBC = BCI->clone();
- Pred->getInstList().insert(NewRet->getIterator(), NewBC);
- *i = NewBC;
- }
- if (PHINode *PN = dyn_cast<PHINode>(V)) {
- if (PN->getParent() == BB) {
- if (NewBC)
- NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred));
- else
- *i = PN->getIncomingValueForBlock(Pred);
- }
- }
- }
-
- // Update any PHI nodes in the returning block to realize that we no
- // longer branch to them.
- BB->removePredecessor(Pred);
- UncondBranch->eraseFromParent();
-
- if (DTU)
- DTU->applyUpdates({{DominatorTree::Delete, Pred, BB}});
-
- return cast<ReturnInst>(NewRet);
-}
-
-Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
- Instruction *SplitBefore,
- bool Unreachable,
- MDNode *BranchWeights,
- DominatorTree *DT, LoopInfo *LI,
- BasicBlock *ThenBlock) {
- BasicBlock *Head = SplitBefore->getParent();
- BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
- Instruction *HeadOldTerm = Head->getTerminator();
- LLVMContext &C = Head->getContext();
- Instruction *CheckTerm;
- bool CreateThenBlock = (ThenBlock == nullptr);
- if (CreateThenBlock) {
- ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
- if (Unreachable)
- CheckTerm = new UnreachableInst(C, ThenBlock);
- else
- CheckTerm = BranchInst::Create(Tail, ThenBlock);
- CheckTerm->setDebugLoc(SplitBefore->getDebugLoc());
- } else
- CheckTerm = ThenBlock->getTerminator();
- BranchInst *HeadNewTerm =
- BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cond);
- HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
- ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
-
- if (DT) {
- if (DomTreeNode *OldNode = DT->getNode(Head)) {
- std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
-
- DomTreeNode *NewNode = DT->addNewBlock(Tail, Head);
- for (DomTreeNode *Child : Children)
- DT->changeImmediateDominator(Child, NewNode);
-
- // Head dominates ThenBlock.
- if (CreateThenBlock)
- DT->addNewBlock(ThenBlock, Head);
- else
- DT->changeImmediateDominator(ThenBlock, Head);
- }
- }
-
- if (LI) {
- if (Loop *L = LI->getLoopFor(Head)) {
- L->addBasicBlockToLoop(ThenBlock, *LI);
- L->addBasicBlockToLoop(Tail, *LI);
- }
- }
-
- return CheckTerm;
-}
-
-void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
- Instruction **ThenTerm,
- Instruction **ElseTerm,
- MDNode *BranchWeights) {
- BasicBlock *Head = SplitBefore->getParent();
- BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
- Instruction *HeadOldTerm = Head->getTerminator();
- LLVMContext &C = Head->getContext();
- BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
- BasicBlock *ElseBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
- *ThenTerm = BranchInst::Create(Tail, ThenBlock);
- (*ThenTerm)->setDebugLoc(SplitBefore->getDebugLoc());
- *ElseTerm = BranchInst::Create(Tail, ElseBlock);
- (*ElseTerm)->setDebugLoc(SplitBefore->getDebugLoc());
- BranchInst *HeadNewTerm =
- BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/ElseBlock, Cond);
- HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
- ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
-}
-
-Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
- BasicBlock *&IfFalse) {
- PHINode *SomePHI = dyn_cast<PHINode>(BB->begin());
- BasicBlock *Pred1 = nullptr;
- BasicBlock *Pred2 = nullptr;
-
- if (SomePHI) {
- if (SomePHI->getNumIncomingValues() != 2)
- return nullptr;
- Pred1 = SomePHI->getIncomingBlock(0);
- Pred2 = SomePHI->getIncomingBlock(1);
- } else {
- pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
- if (PI == PE) // No predecessor
- return nullptr;
- Pred1 = *PI++;
- if (PI == PE) // Only one predecessor
- return nullptr;
- Pred2 = *PI++;
- if (PI != PE) // More than two predecessors
- return nullptr;
- }
-
- // We can only handle branches. Other control flow will be lowered to
- // branches if possible anyway.
- BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
- BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
- if (!Pred1Br || !Pred2Br)
- return nullptr;
-
- // Eliminate code duplication by ensuring that Pred1Br is conditional if
- // either are.
- if (Pred2Br->isConditional()) {
- // If both branches are conditional, we don't have an "if statement". In
- // reality, we could transform this case, but since the condition will be
- // required anyway, we stand no chance of eliminating it, so the xform is
- // probably not profitable.
- if (Pred1Br->isConditional())
- return nullptr;
-
- std::swap(Pred1, Pred2);
- std::swap(Pred1Br, Pred2Br);
- }
-
- if (Pred1Br->isConditional()) {
- // The only thing we have to watch out for here is to make sure that Pred2
- // doesn't have incoming edges from other blocks. If it does, the condition
- // doesn't dominate BB.
- if (!Pred2->getSinglePredecessor())
- return nullptr;
-
- // If we found a conditional branch predecessor, make sure that it branches
- // to BB and Pred2Br. If it doesn't, this isn't an "if statement".
- if (Pred1Br->getSuccessor(0) == BB &&
- Pred1Br->getSuccessor(1) == Pred2) {
- IfTrue = Pred1;
- IfFalse = Pred2;
- } else if (Pred1Br->getSuccessor(0) == Pred2 &&
- Pred1Br->getSuccessor(1) == BB) {
- IfTrue = Pred2;
- IfFalse = Pred1;
- } else {
- // We know that one arm of the conditional goes to BB, so the other must
- // go somewhere unrelated, and this must not be an "if statement".
- return nullptr;
- }
-
- return Pred1Br->getCondition();
- }
-
- // Ok, if we got here, both predecessors end with an unconditional branch to
- // BB. Don't panic! If both blocks only have a single (identical)
- // predecessor, and THAT is a conditional branch, then we're all ok!
- BasicBlock *CommonPred = Pred1->getSinglePredecessor();
- if (CommonPred == nullptr || CommonPred != Pred2->getSinglePredecessor())
- return nullptr;
-
- // Otherwise, if this is a conditional branch, then we can use it!
- BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
- if (!BI) return nullptr;
-
- assert(BI->isConditional() && "Two successors but not conditional?");
- if (BI->getSuccessor(0) == Pred1) {
- IfTrue = Pred1;
- IfFalse = Pred2;
- } else {
- IfTrue = Pred2;
- IfFalse = Pred1;
- }
- return BI->getCondition();
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
deleted file mode 100644
index f5e4b53f6d97..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ /dev/null
@@ -1,473 +0,0 @@
-//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// BreakCriticalEdges pass - Break all of the critical edges in the CFG by
-// inserting a dummy basic block. This pass may be "required" by passes that
-// cannot deal with critical edges. For this usage, the structure type is
-// forward declared. This pass obviously invalidates the CFG, but can update
-// dominator trees.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/BreakCriticalEdges.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MemorySSAUpdater.h"
-#include "llvm/Analysis/PostDominators.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "break-crit-edges"
-
-STATISTIC(NumBroken, "Number of blocks inserted");
-
-namespace {
- struct BreakCriticalEdges : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- BreakCriticalEdges() : FunctionPass(ID) {
- initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
-
- auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
- auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
-
- auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
- auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
- unsigned N =
- SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI, nullptr, PDT));
- NumBroken += N;
- return N > 0;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
-
- // No loop canonicalization guarantees are broken by this pass.
- AU.addPreservedID(LoopSimplifyID);
- }
- };
-}
-
-char BreakCriticalEdges::ID = 0;
-INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
- "Break critical edges in CFG", false, false)
-
-// Publicly exposed interface to pass...
-char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
-FunctionPass *llvm::createBreakCriticalEdgesPass() {
- return new BreakCriticalEdges();
-}
-
-PreservedAnalyses BreakCriticalEdgesPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
- auto *LI = AM.getCachedResult<LoopAnalysis>(F);
- unsigned N = SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI));
- NumBroken += N;
- if (N == 0)
- return PreservedAnalyses::all();
- PreservedAnalyses PA;
- PA.preserve<DominatorTreeAnalysis>();
- PA.preserve<LoopAnalysis>();
- return PA;
-}
-
-//===----------------------------------------------------------------------===//
-// Implementation of the external critical edge manipulation functions
-//===----------------------------------------------------------------------===//
-
-/// When a loop exit edge is split, LCSSA form may require new PHIs in the new
-/// exit block. This function inserts the new PHIs, as needed. Preds is a list
-/// of preds inside the loop, SplitBB is the new loop exit block, and DestBB is
-/// the old loop exit, now the successor of SplitBB.
-static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
- BasicBlock *SplitBB,
- BasicBlock *DestBB) {
- // SplitBB shouldn't have anything non-trivial in it yet.
- assert((SplitBB->getFirstNonPHI() == SplitBB->getTerminator() ||
- SplitBB->isLandingPad()) && "SplitBB has non-PHI nodes!");
-
- // For each PHI in the destination block.
- for (PHINode &PN : DestBB->phis()) {
- unsigned Idx = PN.getBasicBlockIndex(SplitBB);
- Value *V = PN.getIncomingValue(Idx);
-
- // If the input is a PHI which already satisfies LCSSA, don't create
- // a new one.
- if (const PHINode *VP = dyn_cast<PHINode>(V))
- if (VP->getParent() == SplitBB)
- continue;
-
- // Otherwise a new PHI is needed. Create one and populate it.
- PHINode *NewPN = PHINode::Create(
- PN.getType(), Preds.size(), "split",
- SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator());
- for (unsigned i = 0, e = Preds.size(); i != e; ++i)
- NewPN->addIncoming(V, Preds[i]);
-
- // Update the original PHI.
- PN.setIncomingValue(Idx, NewPN);
- }
-}
-
-BasicBlock *
-llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
- const CriticalEdgeSplittingOptions &Options) {
- if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges))
- return nullptr;
-
- assert(!isa<IndirectBrInst>(TI) &&
- "Cannot split critical edge from IndirectBrInst");
-
- BasicBlock *TIBB = TI->getParent();
- BasicBlock *DestBB = TI->getSuccessor(SuccNum);
-
- // Splitting the critical edge to a pad block is non-trivial. Don't do
- // it in this generic function.
- if (DestBB->isEHPad()) return nullptr;
-
- // Don't split the non-fallthrough edge from a callbr.
- if (isa<CallBrInst>(TI) && SuccNum > 0)
- return nullptr;
-
- if (Options.IgnoreUnreachableDests &&
- isa<UnreachableInst>(DestBB->getFirstNonPHIOrDbgOrLifetime()))
- return nullptr;
-
- // Create a new basic block, linking it into the CFG.
- BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
- TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
- // Create our unconditional branch.
- BranchInst *NewBI = BranchInst::Create(DestBB, NewBB);
- NewBI->setDebugLoc(TI->getDebugLoc());
-
- // Branch to the new block, breaking the edge.
- TI->setSuccessor(SuccNum, NewBB);
-
- // Insert the block into the function... right after the block TI lives in.
- Function &F = *TIBB->getParent();
- Function::iterator FBBI = TIBB->getIterator();
- F.getBasicBlockList().insert(++FBBI, NewBB);
-
- // If there are any PHI nodes in DestBB, we need to update them so that they
- // merge incoming values from NewBB instead of from TIBB.
- {
- unsigned BBIdx = 0;
- for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
- // We no longer enter through TIBB, now we come in through NewBB.
- // Revector exactly one entry in the PHI node that used to come from
- // TIBB to come from NewBB.
- PHINode *PN = cast<PHINode>(I);
-
- // Reuse the previous value of BBIdx if it lines up. In cases where we
- // have multiple phi nodes with *lots* of predecessors, this is a speed
- // win because we don't have to scan the PHI looking for TIBB. This
- // happens because the BB list of PHI nodes are usually in the same
- // order.
- if (PN->getIncomingBlock(BBIdx) != TIBB)
- BBIdx = PN->getBasicBlockIndex(TIBB);
- PN->setIncomingBlock(BBIdx, NewBB);
- }
- }
-
- // If there are any other edges from TIBB to DestBB, update those to go
- // through the split block, making those edges non-critical as well (and
- // reducing the number of phi entries in the DestBB if relevant).
- if (Options.MergeIdenticalEdges) {
- for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) {
- if (TI->getSuccessor(i) != DestBB) continue;
-
- // Remove an entry for TIBB from DestBB phi nodes.
- DestBB->removePredecessor(TIBB, Options.KeepOneInputPHIs);
-
- // We found another edge to DestBB, go to NewBB instead.
- TI->setSuccessor(i, NewBB);
- }
- }
-
- // If we have nothing to update, just return.
- auto *DT = Options.DT;
- auto *PDT = Options.PDT;
- auto *LI = Options.LI;
- auto *MSSAU = Options.MSSAU;
- if (MSSAU)
- MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
- DestBB, NewBB, {TIBB}, Options.MergeIdenticalEdges);
-
- if (!DT && !PDT && !LI)
- return NewBB;
-
- if (DT || PDT) {
- // Update the DominatorTree.
- // ---> NewBB -----\
- // / V
- // TIBB -------\\------> DestBB
- //
- // First, inform the DT about the new path from TIBB to DestBB via NewBB,
- // then delete the old edge from TIBB to DestBB. By doing this in that order
- // DestBB stays reachable in the DT the whole time and its subtree doesn't
- // get disconnected.
- SmallVector<DominatorTree::UpdateType, 3> Updates;
- Updates.push_back({DominatorTree::Insert, TIBB, NewBB});
- Updates.push_back({DominatorTree::Insert, NewBB, DestBB});
- if (llvm::find(successors(TIBB), DestBB) == succ_end(TIBB))
- Updates.push_back({DominatorTree::Delete, TIBB, DestBB});
-
- if (DT)
- DT->applyUpdates(Updates);
- if (PDT)
- PDT->applyUpdates(Updates);
- }
-
- // Update LoopInfo if it is around.
- if (LI) {
- if (Loop *TIL = LI->getLoopFor(TIBB)) {
- // If one or the other blocks were not in a loop, the new block is not
- // either, and thus LI doesn't need to be updated.
- if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
- if (TIL == DestLoop) {
- // Both in the same loop, the NewBB joins loop.
- DestLoop->addBasicBlockToLoop(NewBB, *LI);
- } else if (TIL->contains(DestLoop)) {
- // Edge from an outer loop to an inner loop. Add to the outer loop.
- TIL->addBasicBlockToLoop(NewBB, *LI);
- } else if (DestLoop->contains(TIL)) {
- // Edge from an inner loop to an outer loop. Add to the outer loop.
- DestLoop->addBasicBlockToLoop(NewBB, *LI);
- } else {
- // Edge from two loops with no containment relation. Because these
- // are natural loops, we know that the destination block must be the
- // header of its loop (adding a branch into a loop elsewhere would
- // create an irreducible loop).
- assert(DestLoop->getHeader() == DestBB &&
- "Should not create irreducible loops!");
- if (Loop *P = DestLoop->getParentLoop())
- P->addBasicBlockToLoop(NewBB, *LI);
- }
- }
-
- // If TIBB is in a loop and DestBB is outside of that loop, we may need
- // to update LoopSimplify form and LCSSA form.
- if (!TIL->contains(DestBB)) {
- assert(!TIL->contains(NewBB) &&
- "Split point for loop exit is contained in loop!");
-
- // Update LCSSA form in the newly created exit block.
- if (Options.PreserveLCSSA) {
- createPHIsForSplitLoopExit(TIBB, NewBB, DestBB);
- }
-
- // The only that we can break LoopSimplify form by splitting a critical
- // edge is if after the split there exists some edge from TIL to DestBB
- // *and* the only edge into DestBB from outside of TIL is that of
- // NewBB. If the first isn't true, then LoopSimplify still holds, NewBB
- // is the new exit block and it has no non-loop predecessors. If the
- // second isn't true, then DestBB was not in LoopSimplify form prior to
- // the split as it had a non-loop predecessor. In both of these cases,
- // the predecessor must be directly in TIL, not in a subloop, or again
- // LoopSimplify doesn't hold.
- SmallVector<BasicBlock *, 4> LoopPreds;
- for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E;
- ++I) {
- BasicBlock *P = *I;
- if (P == NewBB)
- continue; // The new block is known.
- if (LI->getLoopFor(P) != TIL) {
- // No need to re-simplify, it wasn't to start with.
- LoopPreds.clear();
- break;
- }
- LoopPreds.push_back(P);
- }
- if (!LoopPreds.empty()) {
- assert(!DestBB->isEHPad() && "We don't split edges to EH pads!");
- BasicBlock *NewExitBB = SplitBlockPredecessors(
- DestBB, LoopPreds, "split", DT, LI, MSSAU, Options.PreserveLCSSA);
- if (Options.PreserveLCSSA)
- createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB);
- }
- }
- }
- }
-
- return NewBB;
-}
-
-// Return the unique indirectbr predecessor of a block. This may return null
-// even if such a predecessor exists, if it's not useful for splitting.
-// If a predecessor is found, OtherPreds will contain all other (non-indirectbr)
-// predecessors of BB.
-static BasicBlock *
-findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) {
- // If the block doesn't have any PHIs, we don't care about it, since there's
- // no point in splitting it.
- PHINode *PN = dyn_cast<PHINode>(BB->begin());
- if (!PN)
- return nullptr;
-
- // Verify we have exactly one IBR predecessor.
- // Conservatively bail out if one of the other predecessors is not a "regular"
- // terminator (that is, not a switch or a br).
- BasicBlock *IBB = nullptr;
- for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) {
- BasicBlock *PredBB = PN->getIncomingBlock(Pred);
- Instruction *PredTerm = PredBB->getTerminator();
- switch (PredTerm->getOpcode()) {
- case Instruction::IndirectBr:
- if (IBB)
- return nullptr;
- IBB = PredBB;
- break;
- case Instruction::Br:
- case Instruction::Switch:
- OtherPreds.push_back(PredBB);
- continue;
- default:
- return nullptr;
- }
- }
-
- return IBB;
-}
-
-bool llvm::SplitIndirectBrCriticalEdges(Function &F,
- BranchProbabilityInfo *BPI,
- BlockFrequencyInfo *BFI) {
- // Check whether the function has any indirectbrs, and collect which blocks
- // they may jump to. Since most functions don't have indirect branches,
- // this lowers the common case's overhead to O(Blocks) instead of O(Edges).
- SmallSetVector<BasicBlock *, 16> Targets;
- for (auto &BB : F) {
- auto *IBI = dyn_cast<IndirectBrInst>(BB.getTerminator());
- if (!IBI)
- continue;
-
- for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ)
- Targets.insert(IBI->getSuccessor(Succ));
- }
-
- if (Targets.empty())
- return false;
-
- bool ShouldUpdateAnalysis = BPI && BFI;
- bool Changed = false;
- for (BasicBlock *Target : Targets) {
- SmallVector<BasicBlock *, 16> OtherPreds;
- BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds);
- // If we did not found an indirectbr, or the indirectbr is the only
- // incoming edge, this isn't the kind of edge we're looking for.
- if (!IBRPred || OtherPreds.empty())
- continue;
-
- // Don't even think about ehpads/landingpads.
- Instruction *FirstNonPHI = Target->getFirstNonPHI();
- if (FirstNonPHI->isEHPad() || Target->isLandingPad())
- continue;
-
- BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split");
- if (ShouldUpdateAnalysis) {
- // Copy the BFI/BPI from Target to BodyBlock.
- for (unsigned I = 0, E = BodyBlock->getTerminator()->getNumSuccessors();
- I < E; ++I)
- BPI->setEdgeProbability(BodyBlock, I,
- BPI->getEdgeProbability(Target, I));
- BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target).getFrequency());
- }
- // It's possible Target was its own successor through an indirectbr.
- // In this case, the indirectbr now comes from BodyBlock.
- if (IBRPred == Target)
- IBRPred = BodyBlock;
-
- // At this point Target only has PHIs, and BodyBlock has the rest of the
- // block's body. Create a copy of Target that will be used by the "direct"
- // preds.
- ValueToValueMapTy VMap;
- BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F);
-
- BlockFrequency BlockFreqForDirectSucc;
- for (BasicBlock *Pred : OtherPreds) {
- // If the target is a loop to itself, then the terminator of the split
- // block (BodyBlock) needs to be updated.
- BasicBlock *Src = Pred != Target ? Pred : BodyBlock;
- Src->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
- if (ShouldUpdateAnalysis)
- BlockFreqForDirectSucc += BFI->getBlockFreq(Src) *
- BPI->getEdgeProbability(Src, DirectSucc);
- }
- if (ShouldUpdateAnalysis) {
- BFI->setBlockFreq(DirectSucc, BlockFreqForDirectSucc.getFrequency());
- BlockFrequency NewBlockFreqForTarget =
- BFI->getBlockFreq(Target) - BlockFreqForDirectSucc;
- BFI->setBlockFreq(Target, NewBlockFreqForTarget.getFrequency());
- BPI->eraseBlock(Target);
- }
-
- // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
- // they are clones, so the number of PHIs are the same.
- // (a) Remove the edge coming from IBRPred from the "Direct" PHI
- // (b) Leave that as the only edge in the "Indirect" PHI.
- // (c) Merge the two in the body block.
- BasicBlock::iterator Indirect = Target->begin(),
- End = Target->getFirstNonPHI()->getIterator();
- BasicBlock::iterator Direct = DirectSucc->begin();
- BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt();
-
- assert(&*End == Target->getTerminator() &&
- "Block was expected to only contain PHIs");
-
- while (Indirect != End) {
- PHINode *DirPHI = cast<PHINode>(Direct);
- PHINode *IndPHI = cast<PHINode>(Indirect);
-
- // Now, clean up - the direct block shouldn't get the indirect value,
- // and vice versa.
- DirPHI->removeIncomingValue(IBRPred);
- Direct++;
-
- // Advance the pointer here, to avoid invalidation issues when the old
- // PHI is erased.
- Indirect++;
-
- PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI);
- NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred),
- IBRPred);
-
- // Create a PHI in the body block, to merge the direct and indirect
- // predecessors.
- PHINode *MergePHI =
- PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert);
- MergePHI->addIncoming(NewIndPHI, Target);
- MergePHI->addIncoming(DirPHI, DirectSucc);
-
- IndPHI->replaceAllUsesWith(MergePHI);
- IndPHI->eraseFromParent();
- }
-
- Changed = true;
- }
-
- return Changed;
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
deleted file mode 100644
index 27f110e24f9c..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ /dev/null
@@ -1,1340 +0,0 @@
-//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements some functions that will create standard C libcalls.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/BuildLibCalls.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "build-libcalls"
-
-//- Infer Attributes ---------------------------------------------------------//
-
-STATISTIC(NumReadNone, "Number of functions inferred as readnone");
-STATISTIC(NumReadOnly, "Number of functions inferred as readonly");
-STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly");
-STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind");
-STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture");
-STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
-STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
-STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns");
-STATISTIC(NumReturnedArg, "Number of arguments inferred as returned");
-
-static bool setDoesNotAccessMemory(Function &F) {
- if (F.doesNotAccessMemory())
- return false;
- F.setDoesNotAccessMemory();
- ++NumReadNone;
- return true;
-}
-
-static bool setOnlyReadsMemory(Function &F) {
- if (F.onlyReadsMemory())
- return false;
- F.setOnlyReadsMemory();
- ++NumReadOnly;
- return true;
-}
-
-static bool setOnlyAccessesArgMemory(Function &F) {
- if (F.onlyAccessesArgMemory())
- return false;
- F.setOnlyAccessesArgMemory();
- ++NumArgMemOnly;
- return true;
-}
-
-static bool setDoesNotThrow(Function &F) {
- if (F.doesNotThrow())
- return false;
- F.setDoesNotThrow();
- ++NumNoUnwind;
- return true;
-}
-
-static bool setRetDoesNotAlias(Function &F) {
- if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias))
- return false;
- F.addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
- ++NumNoAlias;
- return true;
-}
-
-static bool setDoesNotCapture(Function &F, unsigned ArgNo) {
- if (F.hasParamAttribute(ArgNo, Attribute::NoCapture))
- return false;
- F.addParamAttr(ArgNo, Attribute::NoCapture);
- ++NumNoCapture;
- return true;
-}
-
-static bool setOnlyReadsMemory(Function &F, unsigned ArgNo) {
- if (F.hasParamAttribute(ArgNo, Attribute::ReadOnly))
- return false;
- F.addParamAttr(ArgNo, Attribute::ReadOnly);
- ++NumReadOnlyArg;
- return true;
-}
-
-static bool setRetNonNull(Function &F) {
- assert(F.getReturnType()->isPointerTy() &&
- "nonnull applies only to pointers");
- if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NonNull))
- return false;
- F.addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
- ++NumNonNull;
- return true;
-}
-
-static bool setReturnedArg(Function &F, unsigned ArgNo) {
- if (F.hasParamAttribute(ArgNo, Attribute::Returned))
- return false;
- F.addParamAttr(ArgNo, Attribute::Returned);
- ++NumReturnedArg;
- return true;
-}
-
-static bool setNonLazyBind(Function &F) {
- if (F.hasFnAttribute(Attribute::NonLazyBind))
- return false;
- F.addFnAttr(Attribute::NonLazyBind);
- return true;
-}
-
-static bool setDoesNotFreeMemory(Function &F) {
- if (F.hasFnAttribute(Attribute::NoFree))
- return false;
- F.addFnAttr(Attribute::NoFree);
- return true;
-}
-
-bool llvm::inferLibFuncAttributes(Module *M, StringRef Name,
- const TargetLibraryInfo &TLI) {
- Function *F = M->getFunction(Name);
- if (!F)
- return false;
- return inferLibFuncAttributes(*F, TLI);
-}
-
-bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
- LibFunc TheLibFunc;
- if (!(TLI.getLibFunc(F, TheLibFunc) && TLI.has(TheLibFunc)))
- return false;
-
- bool Changed = false;
-
- if(!isLibFreeFunction(&F, TheLibFunc) && !isReallocLikeFn(&F, &TLI))
- Changed |= setDoesNotFreeMemory(F);
-
- if (F.getParent() != nullptr && F.getParent()->getRtLibUseGOT())
- Changed |= setNonLazyBind(F);
-
- switch (TheLibFunc) {
- case LibFunc_strlen:
- case LibFunc_wcslen:
- Changed |= setOnlyReadsMemory(F);
- Changed |= setDoesNotThrow(F);
- Changed |= setOnlyAccessesArgMemory(F);
- Changed |= setDoesNotCapture(F, 0);
- return Changed;
- case LibFunc_strchr:
- case LibFunc_strrchr:
- Changed |= setOnlyReadsMemory(F);
- Changed |= setDoesNotThrow(F);
- return Changed;
- case LibFunc_strtol:
- case LibFunc_strtod:
- case LibFunc_strtof:
- case LibFunc_strtoul:
- case LibFunc_strtoll:
- case LibFunc_strtold:
- case LibFunc_strtoull:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_strcpy:
- case LibFunc_strncpy:
- case LibFunc_strcat:
- case LibFunc_strncat:
- Changed |= setReturnedArg(F, 0);
- LLVM_FALLTHROUGH;
- case LibFunc_stpcpy:
- case LibFunc_stpncpy:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_strxfrm:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_strcmp: // 0,1
- case LibFunc_strspn: // 0,1
- case LibFunc_strncmp: // 0,1
- case LibFunc_strcspn: // 0,1
- case LibFunc_strcoll: // 0,1
- case LibFunc_strcasecmp: // 0,1
- case LibFunc_strncasecmp: //
- Changed |= setOnlyReadsMemory(F);
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc_strstr:
- case LibFunc_strpbrk:
- Changed |= setOnlyReadsMemory(F);
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc_strtok:
- case LibFunc_strtok_r:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_scanf:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_setbuf:
- case LibFunc_setvbuf:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- return Changed;
- case LibFunc_strdup:
- case LibFunc_strndup:
- Changed |= setDoesNotThrow(F);
- Changed |= setRetDoesNotAlias(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_stat:
- case LibFunc_statvfs:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_sscanf:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 0);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_sprintf:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_snprintf:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc_setitimer:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_system:
- // May throw; "system" is a valid pthread cancellation point.
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_malloc:
- Changed |= setDoesNotThrow(F);
- Changed |= setRetDoesNotAlias(F);
- return Changed;
- case LibFunc_memcmp:
- Changed |= setOnlyReadsMemory(F);
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc_memchr:
- case LibFunc_memrchr:
- Changed |= setOnlyReadsMemory(F);
- Changed |= setDoesNotThrow(F);
- return Changed;
- case LibFunc_modf:
- case LibFunc_modff:
- case LibFunc_modfl:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc_memcpy:
- case LibFunc_memmove:
- Changed |= setReturnedArg(F, 0);
- LLVM_FALLTHROUGH;
- case LibFunc_mempcpy:
- case LibFunc_memccpy:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_memcpy_chk:
- Changed |= setDoesNotThrow(F);
- return Changed;
- case LibFunc_memalign:
- Changed |= setRetDoesNotAlias(F);
- return Changed;
- case LibFunc_mkdir:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_mktime:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- return Changed;
- case LibFunc_realloc:
- Changed |= setDoesNotThrow(F);
- Changed |= setRetDoesNotAlias(F);
- Changed |= setDoesNotCapture(F, 0);
- return Changed;
- case LibFunc_read:
- // May throw; "read" is a valid pthread cancellation point.
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc_rewind:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- return Changed;
- case LibFunc_rmdir:
- case LibFunc_remove:
- case LibFunc_realpath:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_rename:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 0);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_readlink:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_write:
- // May throw; "write" is a valid pthread cancellation point.
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_bcopy:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_bcmp:
- Changed |= setDoesNotThrow(F);
- Changed |= setOnlyReadsMemory(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc_bzero:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- return Changed;
- case LibFunc_calloc:
- Changed |= setDoesNotThrow(F);
- Changed |= setRetDoesNotAlias(F);
- return Changed;
- case LibFunc_chmod:
- case LibFunc_chown:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_ctermid:
- case LibFunc_clearerr:
- case LibFunc_closedir:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- return Changed;
- case LibFunc_atoi:
- case LibFunc_atol:
- case LibFunc_atof:
- case LibFunc_atoll:
- Changed |= setDoesNotThrow(F);
- Changed |= setOnlyReadsMemory(F);
- Changed |= setDoesNotCapture(F, 0);
- return Changed;
- case LibFunc_access:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_fopen:
- Changed |= setDoesNotThrow(F);
- Changed |= setRetDoesNotAlias(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 0);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_fdopen:
- Changed |= setDoesNotThrow(F);
- Changed |= setRetDoesNotAlias(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_feof:
- case LibFunc_free:
- case LibFunc_fseek:
- case LibFunc_ftell:
- case LibFunc_fgetc:
- case LibFunc_fgetc_unlocked:
- case LibFunc_fseeko:
- case LibFunc_ftello:
- case LibFunc_fileno:
- case LibFunc_fflush:
- case LibFunc_fclose:
- case LibFunc_fsetpos:
- case LibFunc_flockfile:
- case LibFunc_funlockfile:
- case LibFunc_ftrylockfile:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- return Changed;
- case LibFunc_ferror:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyReadsMemory(F);
- return Changed;
- case LibFunc_fputc:
- case LibFunc_fputc_unlocked:
- case LibFunc_fstat:
- case LibFunc_frexp:
- case LibFunc_frexpf:
- case LibFunc_frexpl:
- case LibFunc_fstatvfs:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc_fgets:
- case LibFunc_fgets_unlocked:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 2);
- return Changed;
- case LibFunc_fread:
- case LibFunc_fread_unlocked:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 3);
- return Changed;
- case LibFunc_fwrite:
- case LibFunc_fwrite_unlocked:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 3);
- // FIXME: readonly #1?
- return Changed;
- case LibFunc_fputs:
- case LibFunc_fputs_unlocked:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_fscanf:
- case LibFunc_fprintf:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_fgetpos:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc_getc:
- case LibFunc_getlogin_r:
- case LibFunc_getc_unlocked:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- return Changed;
- case LibFunc_getenv:
- Changed |= setDoesNotThrow(F);
- Changed |= setOnlyReadsMemory(F);
- Changed |= setDoesNotCapture(F, 0);
- return Changed;
- case LibFunc_gets:
- case LibFunc_getchar:
- case LibFunc_getchar_unlocked:
- Changed |= setDoesNotThrow(F);
- return Changed;
- case LibFunc_getitimer:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc_getpwnam:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_ungetc:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc_uname:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- return Changed;
- case LibFunc_unlink:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_unsetenv:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_utime:
- case LibFunc_utimes:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 0);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_putc:
- case LibFunc_putc_unlocked:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc_puts:
- case LibFunc_printf:
- case LibFunc_perror:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_pread:
- // May throw; "pread" is a valid pthread cancellation point.
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc_pwrite:
- // May throw; "pwrite" is a valid pthread cancellation point.
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_putchar:
- case LibFunc_putchar_unlocked:
- Changed |= setDoesNotThrow(F);
- return Changed;
- case LibFunc_popen:
- Changed |= setDoesNotThrow(F);
- Changed |= setRetDoesNotAlias(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 0);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_pclose:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- return Changed;
- case LibFunc_vscanf:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_vsscanf:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 0);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_vfscanf:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_valloc:
- Changed |= setDoesNotThrow(F);
- Changed |= setRetDoesNotAlias(F);
- return Changed;
- case LibFunc_vprintf:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_vfprintf:
- case LibFunc_vsprintf:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_vsnprintf:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc_open:
- // May throw; "open" is a valid pthread cancellation point.
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_opendir:
- Changed |= setDoesNotThrow(F);
- Changed |= setRetDoesNotAlias(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_tmpfile:
- Changed |= setDoesNotThrow(F);
- Changed |= setRetDoesNotAlias(F);
- return Changed;
- case LibFunc_times:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- return Changed;
- case LibFunc_htonl:
- case LibFunc_htons:
- case LibFunc_ntohl:
- case LibFunc_ntohs:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotAccessMemory(F);
- return Changed;
- case LibFunc_lstat:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_lchown:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_qsort:
- // May throw; places call through function pointer.
- Changed |= setDoesNotCapture(F, 3);
- return Changed;
- case LibFunc_dunder_strdup:
- case LibFunc_dunder_strndup:
- Changed |= setDoesNotThrow(F);
- Changed |= setRetDoesNotAlias(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_dunder_strtok_r:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_under_IO_getc:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- return Changed;
- case LibFunc_under_IO_putc:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc_dunder_isoc99_scanf:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_stat64:
- case LibFunc_lstat64:
- case LibFunc_statvfs64:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_dunder_isoc99_sscanf:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 0);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_fopen64:
- Changed |= setDoesNotThrow(F);
- Changed |= setRetDoesNotAlias(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 0);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc_fseeko64:
- case LibFunc_ftello64:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- return Changed;
- case LibFunc_tmpfile64:
- Changed |= setDoesNotThrow(F);
- Changed |= setRetDoesNotAlias(F);
- return Changed;
- case LibFunc_fstat64:
- case LibFunc_fstatvfs64:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc_open64:
- // May throw; "open" is a valid pthread cancellation point.
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
- case LibFunc_gettimeofday:
- // Currently some platforms have the restrict keyword on the arguments to
- // gettimeofday. To be conservative, do not add noalias to gettimeofday's
- // arguments.
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc_Znwj: // new(unsigned int)
- case LibFunc_Znwm: // new(unsigned long)
- case LibFunc_Znaj: // new[](unsigned int)
- case LibFunc_Znam: // new[](unsigned long)
- case LibFunc_msvc_new_int: // new(unsigned int)
- case LibFunc_msvc_new_longlong: // new(unsigned long long)
- case LibFunc_msvc_new_array_int: // new[](unsigned int)
- case LibFunc_msvc_new_array_longlong: // new[](unsigned long long)
- // Operator new always returns a nonnull noalias pointer
- Changed |= setRetNonNull(F);
- Changed |= setRetDoesNotAlias(F);
- return Changed;
- // TODO: add LibFunc entries for:
- // case LibFunc_memset_pattern4:
- // case LibFunc_memset_pattern8:
- case LibFunc_memset_pattern16:
- Changed |= setOnlyAccessesArgMemory(F);
- Changed |= setDoesNotCapture(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- // int __nvvm_reflect(const char *)
- case LibFunc_nvvm_reflect:
- Changed |= setDoesNotAccessMemory(F);
- Changed |= setDoesNotThrow(F);
- return Changed;
-
- default:
- // FIXME: It'd be really nice to cover all the library functions we're
- // aware of here.
- return false;
- }
-}
-
-bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
- LibFunc DoubleFn, LibFunc FloatFn,
- LibFunc LongDoubleFn) {
- switch (Ty->getTypeID()) {
- case Type::HalfTyID:
- return false;
- case Type::FloatTyID:
- return TLI->has(FloatFn);
- case Type::DoubleTyID:
- return TLI->has(DoubleFn);
- default:
- return TLI->has(LongDoubleFn);
- }
-}
-
-StringRef llvm::getUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
- LibFunc DoubleFn, LibFunc FloatFn,
- LibFunc LongDoubleFn) {
- assert(hasUnaryFloatFn(TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) &&
- "Cannot get name for unavailable function!");
-
- switch (Ty->getTypeID()) {
- case Type::HalfTyID:
- llvm_unreachable("No name for HalfTy!");
- case Type::FloatTyID:
- return TLI->getName(FloatFn);
- case Type::DoubleTyID:
- return TLI->getName(DoubleFn);
- default:
- return TLI->getName(LongDoubleFn);
- }
-}
-
-//- Emit LibCalls ------------------------------------------------------------//
-
-Value *llvm::castToCStr(Value *V, IRBuilder<> &B) {
- unsigned AS = V->getType()->getPointerAddressSpace();
- return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr");
-}
-
-static Value *emitLibCall(LibFunc TheLibFunc, Type *ReturnType,
- ArrayRef<Type *> ParamTypes,
- ArrayRef<Value *> Operands, IRBuilder<> &B,
- const TargetLibraryInfo *TLI,
- bool IsVaArgs = false) {
- if (!TLI->has(TheLibFunc))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- StringRef FuncName = TLI->getName(TheLibFunc);
- FunctionType *FuncType = FunctionType::get(ReturnType, ParamTypes, IsVaArgs);
- FunctionCallee Callee = M->getOrInsertFunction(FuncName, FuncType);
- inferLibFuncAttributes(M, FuncName, *TLI);
- CallInst *CI = B.CreateCall(Callee, Operands, FuncName);
- if (const Function *F =
- dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
- return CI;
-}
-
-Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
- LLVMContext &Context = B.GetInsertBlock()->getContext();
- return emitLibCall(LibFunc_strlen, DL.getIntPtrType(Context),
- B.getInt8PtrTy(), castToCStr(Ptr, B), B, TLI);
-}
-
-Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
- Type *I32Ty = B.getInt32Ty();
- return emitLibCall(LibFunc_strchr, I8Ptr, {I8Ptr, I32Ty},
- {castToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, B, TLI);
-}
-
-Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
- const DataLayout &DL, const TargetLibraryInfo *TLI) {
- LLVMContext &Context = B.GetInsertBlock()->getContext();
- return emitLibCall(
- LibFunc_strncmp, B.getInt32Ty(),
- {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)},
- {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
-}
-
-Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
- return emitLibCall(LibFunc_strcpy, I8Ptr, {I8Ptr, I8Ptr},
- {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI);
-}
-
-Value *llvm::emitStpCpy(Value *Dst, Value *Src, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
- return emitLibCall(LibFunc_stpcpy, I8Ptr, {I8Ptr, I8Ptr},
- {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI);
-}
-
-Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
- return emitLibCall(LibFunc_strncpy, I8Ptr, {I8Ptr, I8Ptr, Len->getType()},
- {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI);
-}
-
-Value *llvm::emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
- return emitLibCall(LibFunc_stpncpy, I8Ptr, {I8Ptr, I8Ptr, Len->getType()},
- {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI);
-}
-
-Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
- IRBuilder<> &B, const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_memcpy_chk))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- AttributeList AS;
- AS = AttributeList::get(M->getContext(), AttributeList::FunctionIndex,
- Attribute::NoUnwind);
- LLVMContext &Context = B.GetInsertBlock()->getContext();
- FunctionCallee MemCpy = M->getOrInsertFunction(
- "__memcpy_chk", AttributeList::get(M->getContext(), AS), B.getInt8PtrTy(),
- B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context),
- DL.getIntPtrType(Context));
- Dst = castToCStr(Dst, B);
- Src = castToCStr(Src, B);
- CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize});
- if (const Function *F =
- dyn_cast<Function>(MemCpy.getCallee()->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
- return CI;
-}
-
-Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
- const DataLayout &DL, const TargetLibraryInfo *TLI) {
- LLVMContext &Context = B.GetInsertBlock()->getContext();
- return emitLibCall(
- LibFunc_memchr, B.getInt8PtrTy(),
- {B.getInt8PtrTy(), B.getInt32Ty(), DL.getIntPtrType(Context)},
- {castToCStr(Ptr, B), Val, Len}, B, TLI);
-}
-
-Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
- const DataLayout &DL, const TargetLibraryInfo *TLI) {
- LLVMContext &Context = B.GetInsertBlock()->getContext();
- return emitLibCall(
- LibFunc_memcmp, B.getInt32Ty(),
- {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)},
- {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
-}
-
-Value *llvm::emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
- const DataLayout &DL, const TargetLibraryInfo *TLI) {
- LLVMContext &Context = B.GetInsertBlock()->getContext();
- return emitLibCall(
- LibFunc_bcmp, B.getInt32Ty(),
- {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)},
- {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
-}
-
-Value *llvm::emitMemCCpy(Value *Ptr1, Value *Ptr2, Value *Val, Value *Len,
- IRBuilder<> &B, const TargetLibraryInfo *TLI) {
- return emitLibCall(
- LibFunc_memccpy, B.getInt8PtrTy(),
- {B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt32Ty(), Len->getType()},
- {Ptr1, Ptr2, Val, Len}, B, TLI);
-}
-
-Value *llvm::emitSNPrintf(Value *Dest, Value *Size, Value *Fmt,
- ArrayRef<Value *> VariadicArgs, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- SmallVector<Value *, 8> Args{castToCStr(Dest, B), Size, castToCStr(Fmt, B)};
- Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end());
- return emitLibCall(LibFunc_snprintf, B.getInt32Ty(),
- {B.getInt8PtrTy(), Size->getType(), B.getInt8PtrTy()},
- Args, B, TLI, /*IsVaArgs=*/true);
-}
-
-Value *llvm::emitSPrintf(Value *Dest, Value *Fmt,
- ArrayRef<Value *> VariadicArgs, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- SmallVector<Value *, 8> Args{castToCStr(Dest, B), castToCStr(Fmt, B)};
- Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end());
- return emitLibCall(LibFunc_sprintf, B.getInt32Ty(),
- {B.getInt8PtrTy(), B.getInt8PtrTy()}, Args, B, TLI,
- /*IsVaArgs=*/true);
-}
-
-Value *llvm::emitStrCat(Value *Dest, Value *Src, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- return emitLibCall(LibFunc_strcat, B.getInt8PtrTy(),
- {B.getInt8PtrTy(), B.getInt8PtrTy()},
- {castToCStr(Dest, B), castToCStr(Src, B)}, B, TLI);
-}
-
-Value *llvm::emitStrLCpy(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- return emitLibCall(LibFunc_strlcpy, Size->getType(),
- {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()},
- {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
-}
-
-Value *llvm::emitStrLCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- return emitLibCall(LibFunc_strlcat, Size->getType(),
- {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()},
- {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
-}
-
-Value *llvm::emitStrNCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- return emitLibCall(LibFunc_strncat, B.getInt8PtrTy(),
- {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()},
- {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
-}
-
-Value *llvm::emitVSNPrintf(Value *Dest, Value *Size, Value *Fmt, Value *VAList,
- IRBuilder<> &B, const TargetLibraryInfo *TLI) {
- return emitLibCall(
- LibFunc_vsnprintf, B.getInt32Ty(),
- {B.getInt8PtrTy(), Size->getType(), B.getInt8PtrTy(), VAList->getType()},
- {castToCStr(Dest, B), Size, castToCStr(Fmt, B), VAList}, B, TLI);
-}
-
-Value *llvm::emitVSPrintf(Value *Dest, Value *Fmt, Value *VAList,
- IRBuilder<> &B, const TargetLibraryInfo *TLI) {
- return emitLibCall(LibFunc_vsprintf, B.getInt32Ty(),
- {B.getInt8PtrTy(), B.getInt8PtrTy(), VAList->getType()},
- {castToCStr(Dest, B), castToCStr(Fmt, B), VAList}, B, TLI);
-}
-
-/// Append a suffix to the function name according to the type of 'Op'.
-static void appendTypeSuffix(Value *Op, StringRef &Name,
- SmallString<20> &NameBuffer) {
- if (!Op->getType()->isDoubleTy()) {
- NameBuffer += Name;
-
- if (Op->getType()->isFloatTy())
- NameBuffer += 'f';
- else
- NameBuffer += 'l';
-
- Name = NameBuffer;
- }
-}
-
-static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name,
- IRBuilder<> &B,
- const AttributeList &Attrs) {
- assert((Name != "") && "Must specify Name to emitUnaryFloatFnCall");
-
- Module *M = B.GetInsertBlock()->getModule();
- FunctionCallee Callee =
- M->getOrInsertFunction(Name, Op->getType(), Op->getType());
- CallInst *CI = B.CreateCall(Callee, Op, Name);
-
- // The incoming attribute set may have come from a speculatable intrinsic, but
- // is being replaced with a library call which is not allowed to be
- // speculatable.
- CI->setAttributes(Attrs.removeAttribute(B.getContext(),
- AttributeList::FunctionIndex,
- Attribute::Speculatable));
- if (const Function *F =
- dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
-
- return CI;
-}
-
-Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
- const AttributeList &Attrs) {
- SmallString<20> NameBuffer;
- appendTypeSuffix(Op, Name, NameBuffer);
-
- return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs);
-}
-
-Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
- LibFunc DoubleFn, LibFunc FloatFn,
- LibFunc LongDoubleFn, IRBuilder<> &B,
- const AttributeList &Attrs) {
- // Get the name of the function according to TLI.
- StringRef Name = getUnaryFloatFn(TLI, Op->getType(),
- DoubleFn, FloatFn, LongDoubleFn);
-
- return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs);
-}
-
-Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
- IRBuilder<> &B, const AttributeList &Attrs) {
- assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall");
-
- SmallString<20> NameBuffer;
- appendTypeSuffix(Op1, Name, NameBuffer);
-
- Module *M = B.GetInsertBlock()->getModule();
- FunctionCallee Callee = M->getOrInsertFunction(
- Name, Op1->getType(), Op1->getType(), Op2->getType());
- CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name);
- CI->setAttributes(Attrs);
- if (const Function *F =
- dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
-
- return CI;
-}
-
-Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_putchar))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- StringRef PutCharName = TLI->getName(LibFunc_putchar);
- FunctionCallee PutChar =
- M->getOrInsertFunction(PutCharName, B.getInt32Ty(), B.getInt32Ty());
- inferLibFuncAttributes(M, PutCharName, *TLI);
- CallInst *CI = B.CreateCall(PutChar,
- B.CreateIntCast(Char,
- B.getInt32Ty(),
- /*isSigned*/true,
- "chari"),
- PutCharName);
-
- if (const Function *F =
- dyn_cast<Function>(PutChar.getCallee()->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
- return CI;
-}
-
-Value *llvm::emitPutS(Value *Str, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_puts))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- StringRef PutsName = TLI->getName(LibFunc_puts);
- FunctionCallee PutS =
- M->getOrInsertFunction(PutsName, B.getInt32Ty(), B.getInt8PtrTy());
- inferLibFuncAttributes(M, PutsName, *TLI);
- CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), PutsName);
- if (const Function *F =
- dyn_cast<Function>(PutS.getCallee()->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
- return CI;
-}
-
-Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_fputc))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- StringRef FPutcName = TLI->getName(LibFunc_fputc);
- FunctionCallee F = M->getOrInsertFunction(FPutcName, B.getInt32Ty(),
- B.getInt32Ty(), File->getType());
- if (File->getType()->isPointerTy())
- inferLibFuncAttributes(M, FPutcName, *TLI);
- Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
- "chari");
- CallInst *CI = B.CreateCall(F, {Char, File}, FPutcName);
-
- if (const Function *Fn =
- dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
- CI->setCallingConv(Fn->getCallingConv());
- return CI;
-}
-
-Value *llvm::emitFPutCUnlocked(Value *Char, Value *File, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_fputc_unlocked))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- StringRef FPutcUnlockedName = TLI->getName(LibFunc_fputc_unlocked);
- FunctionCallee F = M->getOrInsertFunction(FPutcUnlockedName, B.getInt32Ty(),
- B.getInt32Ty(), File->getType());
- if (File->getType()->isPointerTy())
- inferLibFuncAttributes(M, FPutcUnlockedName, *TLI);
- Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/ true, "chari");
- CallInst *CI = B.CreateCall(F, {Char, File}, FPutcUnlockedName);
-
- if (const Function *Fn =
- dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
- CI->setCallingConv(Fn->getCallingConv());
- return CI;
-}
-
-Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_fputs))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- StringRef FPutsName = TLI->getName(LibFunc_fputs);
- FunctionCallee F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(),
- B.getInt8PtrTy(), File->getType());
- if (File->getType()->isPointerTy())
- inferLibFuncAttributes(M, FPutsName, *TLI);
- CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsName);
-
- if (const Function *Fn =
- dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
- CI->setCallingConv(Fn->getCallingConv());
- return CI;
-}
-
-Value *llvm::emitFPutSUnlocked(Value *Str, Value *File, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_fputs_unlocked))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- StringRef FPutsUnlockedName = TLI->getName(LibFunc_fputs_unlocked);
- FunctionCallee F = M->getOrInsertFunction(FPutsUnlockedName, B.getInt32Ty(),
- B.getInt8PtrTy(), File->getType());
- if (File->getType()->isPointerTy())
- inferLibFuncAttributes(M, FPutsUnlockedName, *TLI);
- CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsUnlockedName);
-
- if (const Function *Fn =
- dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
- CI->setCallingConv(Fn->getCallingConv());
- return CI;
-}
-
-Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
- const DataLayout &DL, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_fwrite))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- LLVMContext &Context = B.GetInsertBlock()->getContext();
- StringRef FWriteName = TLI->getName(LibFunc_fwrite);
- FunctionCallee F = M->getOrInsertFunction(
- FWriteName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
- DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
-
- if (File->getType()->isPointerTy())
- inferLibFuncAttributes(M, FWriteName, *TLI);
- CallInst *CI =
- B.CreateCall(F, {castToCStr(Ptr, B), Size,
- ConstantInt::get(DL.getIntPtrType(Context), 1), File});
-
- if (const Function *Fn =
- dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
- CI->setCallingConv(Fn->getCallingConv());
- return CI;
-}
-
-Value *llvm::emitMalloc(Value *Num, IRBuilder<> &B, const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_malloc))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- StringRef MallocName = TLI->getName(LibFunc_malloc);
- LLVMContext &Context = B.GetInsertBlock()->getContext();
- FunctionCallee Malloc = M->getOrInsertFunction(MallocName, B.getInt8PtrTy(),
- DL.getIntPtrType(Context));
- inferLibFuncAttributes(M, MallocName, *TLI);
- CallInst *CI = B.CreateCall(Malloc, Num, MallocName);
-
- if (const Function *F =
- dyn_cast<Function>(Malloc.getCallee()->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
-
- return CI;
-}
-
-Value *llvm::emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs,
- IRBuilder<> &B, const TargetLibraryInfo &TLI) {
- if (!TLI.has(LibFunc_calloc))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- StringRef CallocName = TLI.getName(LibFunc_calloc);
- const DataLayout &DL = M->getDataLayout();
- IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext()));
- FunctionCallee Calloc = M->getOrInsertFunction(
- CallocName, Attrs, B.getInt8PtrTy(), PtrType, PtrType);
- inferLibFuncAttributes(M, CallocName, TLI);
- CallInst *CI = B.CreateCall(Calloc, {Num, Size}, CallocName);
-
- if (const auto *F =
- dyn_cast<Function>(Calloc.getCallee()->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
-
- return CI;
-}
-
-Value *llvm::emitFWriteUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
- IRBuilder<> &B, const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_fwrite_unlocked))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- LLVMContext &Context = B.GetInsertBlock()->getContext();
- StringRef FWriteUnlockedName = TLI->getName(LibFunc_fwrite_unlocked);
- FunctionCallee F = M->getOrInsertFunction(
- FWriteUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
- DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
-
- if (File->getType()->isPointerTy())
- inferLibFuncAttributes(M, FWriteUnlockedName, *TLI);
- CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File});
-
- if (const Function *Fn =
- dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
- CI->setCallingConv(Fn->getCallingConv());
- return CI;
-}
-
-Value *llvm::emitFGetCUnlocked(Value *File, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_fgetc_unlocked))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- StringRef FGetCUnlockedName = TLI->getName(LibFunc_fgetc_unlocked);
- FunctionCallee F = M->getOrInsertFunction(FGetCUnlockedName, B.getInt32Ty(),
- File->getType());
- if (File->getType()->isPointerTy())
- inferLibFuncAttributes(M, FGetCUnlockedName, *TLI);
- CallInst *CI = B.CreateCall(F, File, FGetCUnlockedName);
-
- if (const Function *Fn =
- dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
- CI->setCallingConv(Fn->getCallingConv());
- return CI;
-}
-
-Value *llvm::emitFGetSUnlocked(Value *Str, Value *Size, Value *File,
- IRBuilder<> &B, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_fgets_unlocked))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- StringRef FGetSUnlockedName = TLI->getName(LibFunc_fgets_unlocked);
- FunctionCallee F =
- M->getOrInsertFunction(FGetSUnlockedName, B.getInt8PtrTy(),
- B.getInt8PtrTy(), B.getInt32Ty(), File->getType());
- inferLibFuncAttributes(M, FGetSUnlockedName, *TLI);
- CallInst *CI =
- B.CreateCall(F, {castToCStr(Str, B), Size, File}, FGetSUnlockedName);
-
- if (const Function *Fn =
- dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
- CI->setCallingConv(Fn->getCallingConv());
- return CI;
-}
-
-Value *llvm::emitFReadUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
- IRBuilder<> &B, const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_fread_unlocked))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- LLVMContext &Context = B.GetInsertBlock()->getContext();
- StringRef FReadUnlockedName = TLI->getName(LibFunc_fread_unlocked);
- FunctionCallee F = M->getOrInsertFunction(
- FReadUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
- DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
-
- if (File->getType()->isPointerTy())
- inferLibFuncAttributes(M, FReadUnlockedName, *TLI);
- CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File});
-
- if (const Function *Fn =
- dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
- CI->setCallingConv(Fn->getCallingConv());
- return CI;
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
deleted file mode 100644
index df299f673f65..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ /dev/null
@@ -1,474 +0,0 @@
-//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains an optimization for div and rem on architectures that
-// execute short instructions significantly faster than longer instructions.
-// For example, on Intel Atom 32-bit divides are slow enough that during
-// runtime it is profitable to check the value of the operands, and if they are
-// positive and less than 256 use an unsigned 8-bit divide.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/BypassSlowDivision.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/KnownBits.h"
-#include <cassert>
-#include <cstdint>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "bypass-slow-division"
-
-namespace {
-
- struct QuotRemPair {
- Value *Quotient;
- Value *Remainder;
-
- QuotRemPair(Value *InQuotient, Value *InRemainder)
- : Quotient(InQuotient), Remainder(InRemainder) {}
- };
-
- /// A quotient and remainder, plus a BB from which they logically "originate".
- /// If you use Quotient or Remainder in a Phi node, you should use BB as its
- /// corresponding predecessor.
- struct QuotRemWithBB {
- BasicBlock *BB = nullptr;
- Value *Quotient = nullptr;
- Value *Remainder = nullptr;
- };
-
-using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>;
-using BypassWidthsTy = DenseMap<unsigned, unsigned>;
-using VisitedSetTy = SmallPtrSet<Instruction *, 4>;
-
-enum ValueRange {
- /// Operand definitely fits into BypassType. No runtime checks are needed.
- VALRNG_KNOWN_SHORT,
- /// A runtime check is required, as value range is unknown.
- VALRNG_UNKNOWN,
- /// Operand is unlikely to fit into BypassType. The bypassing should be
- /// disabled.
- VALRNG_LIKELY_LONG
-};
-
-class FastDivInsertionTask {
- bool IsValidTask = false;
- Instruction *SlowDivOrRem = nullptr;
- IntegerType *BypassType = nullptr;
- BasicBlock *MainBB = nullptr;
-
- bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
- ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
- QuotRemWithBB createSlowBB(BasicBlock *Successor);
- QuotRemWithBB createFastBB(BasicBlock *Successor);
- QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
- BasicBlock *PhiBB);
- Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2);
- Optional<QuotRemPair> insertFastDivAndRem();
-
- bool isSignedOp() {
- return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
- SlowDivOrRem->getOpcode() == Instruction::SRem;
- }
-
- bool isDivisionOp() {
- return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
- SlowDivOrRem->getOpcode() == Instruction::UDiv;
- }
-
- Type *getSlowType() { return SlowDivOrRem->getType(); }
-
-public:
- FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
-
- Value *getReplacement(DivCacheTy &Cache);
-};
-
-} // end anonymous namespace
-
-FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
- const BypassWidthsTy &BypassWidths) {
- switch (I->getOpcode()) {
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::URem:
- case Instruction::SRem:
- SlowDivOrRem = I;
- break;
- default:
- // I is not a div/rem operation.
- return;
- }
-
- // Skip division on vector types. Only optimize integer instructions.
- IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
- if (!SlowType)
- return;
-
- // Skip if this bitwidth is not bypassed.
- auto BI = BypassWidths.find(SlowType->getBitWidth());
- if (BI == BypassWidths.end())
- return;
-
- // Get type for div/rem instruction with bypass bitwidth.
- IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
- BypassType = BT;
-
- // The original basic block.
- MainBB = I->getParent();
-
- // The instruction is indeed a slow div or rem operation.
- IsValidTask = true;
-}
-
-/// Reuses previously-computed dividend or remainder from the current BB if
-/// operands and operation are identical. Otherwise calls insertFastDivAndRem to
-/// perform the optimization and caches the resulting dividend and remainder.
-/// If no replacement can be generated, nullptr is returned.
-Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
- // First, make sure that the task is valid.
- if (!IsValidTask)
- return nullptr;
-
- // Then, look for a value in Cache.
- Value *Dividend = SlowDivOrRem->getOperand(0);
- Value *Divisor = SlowDivOrRem->getOperand(1);
- DivRemMapKey Key(isSignedOp(), Dividend, Divisor);
- auto CacheI = Cache.find(Key);
-
- if (CacheI == Cache.end()) {
- // If previous instance does not exist, try to insert fast div.
- Optional<QuotRemPair> OptResult = insertFastDivAndRem();
- // Bail out if insertFastDivAndRem has failed.
- if (!OptResult)
- return nullptr;
- CacheI = Cache.insert({Key, *OptResult}).first;
- }
-
- QuotRemPair &Value = CacheI->second;
- return isDivisionOp() ? Value.Quotient : Value.Remainder;
-}
-
-/// Check if a value looks like a hash.
-///
-/// The routine is expected to detect values computed using the most common hash
-/// algorithms. Typically, hash computations end with one of the following
-/// instructions:
-///
-/// 1) MUL with a constant wider than BypassType
-/// 2) XOR instruction
-///
-/// And even if we are wrong and the value is not a hash, it is still quite
-/// unlikely that such values will fit into BypassType.
-///
-/// To detect string hash algorithms like FNV we have to look through PHI-nodes.
-/// It is implemented as a depth-first search for values that look neither long
-/// nor hash-like.
-bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
- Instruction *I = dyn_cast<Instruction>(V);
- if (!I)
- return false;
-
- switch (I->getOpcode()) {
- case Instruction::Xor:
- return true;
- case Instruction::Mul: {
- // After Constant Hoisting pass, long constants may be represented as
- // bitcast instructions. As a result, some constants may look like an
- // instruction at first, and an additional check is necessary to find out if
- // an operand is actually a constant.
- Value *Op1 = I->getOperand(1);
- ConstantInt *C = dyn_cast<ConstantInt>(Op1);
- if (!C && isa<BitCastInst>(Op1))
- C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
- return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth();
- }
- case Instruction::PHI:
- // Stop IR traversal in case of a crazy input code. This limits recursion
- // depth.
- if (Visited.size() >= 16)
- return false;
- // Do not visit nodes that have been visited already. We return true because
- // it means that we couldn't find any value that doesn't look hash-like.
- if (Visited.find(I) != Visited.end())
- return true;
- Visited.insert(I);
- return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
- // Ignore undef values as they probably don't affect the division
- // operands.
- return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
- isa<UndefValue>(V);
- });
- default:
- return false;
- }
-}
-
-/// Check if an integer value fits into our bypass type.
-ValueRange FastDivInsertionTask::getValueRange(Value *V,
- VisitedSetTy &Visited) {
- unsigned ShortLen = BypassType->getBitWidth();
- unsigned LongLen = V->getType()->getIntegerBitWidth();
-
- assert(LongLen > ShortLen && "Value type must be wider than BypassType");
- unsigned HiBits = LongLen - ShortLen;
-
- const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout();
- KnownBits Known(LongLen);
-
- computeKnownBits(V, Known, DL);
-
- if (Known.countMinLeadingZeros() >= HiBits)
- return VALRNG_KNOWN_SHORT;
-
- if (Known.countMaxLeadingZeros() < HiBits)
- return VALRNG_LIKELY_LONG;
-
- // Long integer divisions are often used in hashtable implementations. It's
- // not worth bypassing such divisions because hash values are extremely
- // unlikely to have enough leading zeros. The call below tries to detect
- // values that are unlikely to fit BypassType (including hashes).
- if (isHashLikeValue(V, Visited))
- return VALRNG_LIKELY_LONG;
-
- return VALRNG_UNKNOWN;
-}
-
-/// Add new basic block for slow div and rem operations and put it before
-/// SuccessorBB.
-QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
- QuotRemWithBB DivRemPair;
- DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
- MainBB->getParent(), SuccessorBB);
- IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
-
- Value *Dividend = SlowDivOrRem->getOperand(0);
- Value *Divisor = SlowDivOrRem->getOperand(1);
-
- if (isSignedOp()) {
- DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
- DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
- } else {
- DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
- DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
- }
-
- Builder.CreateBr(SuccessorBB);
- return DivRemPair;
-}
-
-/// Add new basic block for fast div and rem operations and put it before
-/// SuccessorBB.
-QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
- QuotRemWithBB DivRemPair;
- DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
- MainBB->getParent(), SuccessorBB);
- IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
-
- Value *Dividend = SlowDivOrRem->getOperand(0);
- Value *Divisor = SlowDivOrRem->getOperand(1);
- Value *ShortDivisorV =
- Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
- Value *ShortDividendV =
- Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
-
- // udiv/urem because this optimization only handles positive numbers.
- Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
- Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
- DivRemPair.Quotient =
- Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
- DivRemPair.Remainder =
- Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
- Builder.CreateBr(SuccessorBB);
-
- return DivRemPair;
-}
-
-/// Creates Phi nodes for result of Div and Rem.
-QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
- QuotRemWithBB &RHS,
- BasicBlock *PhiBB) {
- IRBuilder<> Builder(PhiBB, PhiBB->begin());
- PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
- QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
- QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
- PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
- RemPhi->addIncoming(LHS.Remainder, LHS.BB);
- RemPhi->addIncoming(RHS.Remainder, RHS.BB);
- return QuotRemPair(QuoPhi, RemPhi);
-}
-
-/// Creates a runtime check to test whether both the divisor and dividend fit
-/// into BypassType. The check is inserted at the end of MainBB. True return
-/// value means that the operands fit. Either of the operands may be NULL if it
-/// doesn't need a runtime check.
-Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {
- assert((Op1 || Op2) && "Nothing to check");
- IRBuilder<> Builder(MainBB, MainBB->end());
-
- Value *OrV;
- if (Op1 && Op2)
- OrV = Builder.CreateOr(Op1, Op2);
- else
- OrV = Op1 ? Op1 : Op2;
-
- // BitMask is inverted to check if the operands are
- // larger than the bypass type
- uint64_t BitMask = ~BypassType->getBitMask();
- Value *AndV = Builder.CreateAnd(OrV, BitMask);
-
- // Compare operand values
- Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
- return Builder.CreateICmpEQ(AndV, ZeroV);
-}
-
-/// Substitutes the div/rem instruction with code that checks the value of the
-/// operands and uses a shorter-faster div/rem instruction when possible.
-Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
- Value *Dividend = SlowDivOrRem->getOperand(0);
- Value *Divisor = SlowDivOrRem->getOperand(1);
-
- VisitedSetTy SetL;
- ValueRange DividendRange = getValueRange(Dividend, SetL);
- if (DividendRange == VALRNG_LIKELY_LONG)
- return None;
-
- VisitedSetTy SetR;
- ValueRange DivisorRange = getValueRange(Divisor, SetR);
- if (DivisorRange == VALRNG_LIKELY_LONG)
- return None;
-
- bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
- bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
-
- if (DividendShort && DivisorShort) {
- // If both operands are known to be short then just replace the long
- // division with a short one in-place. Since we're not introducing control
- // flow in this case, narrowing the division is always a win, even if the
- // divisor is a constant (and will later get replaced by a multiplication).
-
- IRBuilder<> Builder(SlowDivOrRem);
- Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
- Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
- Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
- Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
- Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
- Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
- return QuotRemPair(ExtDiv, ExtRem);
- }
-
- if (isa<ConstantInt>(Divisor)) {
- // If the divisor is not a constant, DAGCombiner will convert it to a
- // multiplication by a magic constant. It isn't clear if it is worth
- // introducing control flow to get a narrower multiply.
- return None;
- }
-
- // After Constant Hoisting pass, long constants may be represented as
- // bitcast instructions. As a result, some constants may look like an
- // instruction at first, and an additional check is necessary to find out if
- // an operand is actually a constant.
- if (auto *BCI = dyn_cast<BitCastInst>(Divisor))
- if (BCI->getParent() == SlowDivOrRem->getParent() &&
- isa<ConstantInt>(BCI->getOperand(0)))
- return None;
-
- if (DividendShort && !isSignedOp()) {
- // If the division is unsigned and Dividend is known to be short, then
- // either
- // 1) Divisor is less or equal to Dividend, and the result can be computed
- // with a short division.
- // 2) Divisor is greater than Dividend. In this case, no division is needed
- // at all: The quotient is 0 and the remainder is equal to Dividend.
- //
- // So instead of checking at runtime whether Divisor fits into BypassType,
- // we emit a runtime check to differentiate between these two cases. This
- // lets us entirely avoid a long div.
-
- // Split the basic block before the div/rem.
- BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
- // Remove the unconditional branch from MainBB to SuccessorBB.
- MainBB->getInstList().back().eraseFromParent();
- QuotRemWithBB Long;
- Long.BB = MainBB;
- Long.Quotient = ConstantInt::get(getSlowType(), 0);
- Long.Remainder = Dividend;
- QuotRemWithBB Fast = createFastBB(SuccessorBB);
- QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
- IRBuilder<> Builder(MainBB, MainBB->end());
- Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
- Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
- return Result;
- } else {
- // General case. Create both slow and fast div/rem pairs and choose one of
- // them at runtime.
-
- // Split the basic block before the div/rem.
- BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
- // Remove the unconditional branch from MainBB to SuccessorBB.
- MainBB->getInstList().back().eraseFromParent();
- QuotRemWithBB Fast = createFastBB(SuccessorBB);
- QuotRemWithBB Slow = createSlowBB(SuccessorBB);
- QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
- Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,
- DivisorShort ? nullptr : Divisor);
- IRBuilder<> Builder(MainBB, MainBB->end());
- Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
- return Result;
- }
-}
-
-/// This optimization identifies DIV/REM instructions in a BB that can be
-/// profitably bypassed and carried out with a shorter, faster divide.
-bool llvm::bypassSlowDivision(BasicBlock *BB,
- const BypassWidthsTy &BypassWidths) {
- DivCacheTy PerBBDivCache;
-
- bool MadeChange = false;
- Instruction* Next = &*BB->begin();
- while (Next != nullptr) {
- // We may add instructions immediately after I, but we want to skip over
- // them.
- Instruction* I = Next;
- Next = Next->getNextNode();
-
- FastDivInsertionTask Task(I, BypassWidths);
- if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
- I->replaceAllUsesWith(Replacement);
- I->eraseFromParent();
- MadeChange = true;
- }
- }
-
- // Above we eagerly create divs and rems, as pairs, so that we can efficiently
- // create divrem machine instructions. Now erase any unused divs / rems so we
- // don't leave extra instructions sitting around.
- for (auto &KV : PerBBDivCache)
- for (Value *V : {KV.second.Quotient, KV.second.Remainder})
- RecursivelyDeleteTriviallyDeadInstructions(V);
-
- return MadeChange;
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
deleted file mode 100644
index f04d76e70c0d..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ /dev/null
@@ -1,461 +0,0 @@
-//===- CallPromotionUtils.cpp - Utilities for call promotion ----*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements utilities useful for promoting indirect call sites to
-// direct call sites.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/CallPromotionUtils.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "call-promotion-utils"
-
-/// Fix-up phi nodes in an invoke instruction's normal destination.
-///
-/// After versioning an invoke instruction, values coming from the original
-/// block will now be coming from the "merge" block. For example, in the code
-/// below:
-///
-/// then_bb:
-/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
-///
-/// else_bb:
-/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
-///
-/// merge_bb:
-/// %t2 = phi i32 [ %t0, %then_bb ], [ %t1, %else_bb ]
-/// br %normal_dst
-///
-/// normal_dst:
-/// %t3 = phi i32 [ %x, %orig_bb ], ...
-///
-/// "orig_bb" is no longer a predecessor of "normal_dst", so the phi nodes in
-/// "normal_dst" must be fixed to refer to "merge_bb":
-///
-/// normal_dst:
-/// %t3 = phi i32 [ %x, %merge_bb ], ...
-///
-static void fixupPHINodeForNormalDest(InvokeInst *Invoke, BasicBlock *OrigBlock,
- BasicBlock *MergeBlock) {
- for (PHINode &Phi : Invoke->getNormalDest()->phis()) {
- int Idx = Phi.getBasicBlockIndex(OrigBlock);
- if (Idx == -1)
- continue;
- Phi.setIncomingBlock(Idx, MergeBlock);
- }
-}
-
-/// Fix-up phi nodes in an invoke instruction's unwind destination.
-///
-/// After versioning an invoke instruction, values coming from the original
-/// block will now be coming from either the "then" block or the "else" block.
-/// For example, in the code below:
-///
-/// then_bb:
-/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
-///
-/// else_bb:
-/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
-///
-/// unwind_dst:
-/// %t3 = phi i32 [ %x, %orig_bb ], ...
-///
-/// "orig_bb" is no longer a predecessor of "unwind_dst", so the phi nodes in
-/// "unwind_dst" must be fixed to refer to "then_bb" and "else_bb":
-///
-/// unwind_dst:
-/// %t3 = phi i32 [ %x, %then_bb ], [ %x, %else_bb ], ...
-///
-static void fixupPHINodeForUnwindDest(InvokeInst *Invoke, BasicBlock *OrigBlock,
- BasicBlock *ThenBlock,
- BasicBlock *ElseBlock) {
- for (PHINode &Phi : Invoke->getUnwindDest()->phis()) {
- int Idx = Phi.getBasicBlockIndex(OrigBlock);
- if (Idx == -1)
- continue;
- auto *V = Phi.getIncomingValue(Idx);
- Phi.setIncomingBlock(Idx, ThenBlock);
- Phi.addIncoming(V, ElseBlock);
- }
-}
-
-/// Create a phi node for the returned value of a call or invoke instruction.
-///
-/// After versioning a call or invoke instruction that returns a value, we have
-/// to merge the value of the original and new instructions. We do this by
-/// creating a phi node and replacing uses of the original instruction with this
-/// phi node.
-///
-/// For example, if \p OrigInst is defined in "else_bb" and \p NewInst is
-/// defined in "then_bb", we create the following phi node:
-///
-/// ; Uses of the original instruction are replaced by uses of the phi node.
-/// %t0 = phi i32 [ %orig_inst, %else_bb ], [ %new_inst, %then_bb ],
-///
-static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst,
- BasicBlock *MergeBlock, IRBuilder<> &Builder) {
-
- if (OrigInst->getType()->isVoidTy() || OrigInst->use_empty())
- return;
-
- Builder.SetInsertPoint(&MergeBlock->front());
- PHINode *Phi = Builder.CreatePHI(OrigInst->getType(), 0);
- SmallVector<User *, 16> UsersToUpdate;
- for (User *U : OrigInst->users())
- UsersToUpdate.push_back(U);
- for (User *U : UsersToUpdate)
- U->replaceUsesOfWith(OrigInst, Phi);
- Phi->addIncoming(OrigInst, OrigInst->getParent());
- Phi->addIncoming(NewInst, NewInst->getParent());
-}
-
-/// Cast a call or invoke instruction to the given type.
-///
-/// When promoting a call site, the return type of the call site might not match
-/// that of the callee. If this is the case, we have to cast the returned value
-/// to the correct type. The location of the cast depends on if we have a call
-/// or invoke instruction.
-///
-/// For example, if the call instruction below requires a bitcast after
-/// promotion:
-///
-/// orig_bb:
-/// %t0 = call i32 @func()
-/// ...
-///
-/// The bitcast is placed after the call instruction:
-///
-/// orig_bb:
-/// ; Uses of the original return value are replaced by uses of the bitcast.
-/// %t0 = call i32 @func()
-/// %t1 = bitcast i32 %t0 to ...
-/// ...
-///
-/// A similar transformation is performed for invoke instructions. However,
-/// since invokes are terminating, a new block is created for the bitcast. For
-/// example, if the invoke instruction below requires a bitcast after promotion:
-///
-/// orig_bb:
-/// %t0 = invoke i32 @func() to label %normal_dst unwind label %unwind_dst
-///
-/// The edge between the original block and the invoke's normal destination is
-/// split, and the bitcast is placed there:
-///
-/// orig_bb:
-/// %t0 = invoke i32 @func() to label %split_bb unwind label %unwind_dst
-///
-/// split_bb:
-/// ; Uses of the original return value are replaced by uses of the bitcast.
-/// %t1 = bitcast i32 %t0 to ...
-/// br label %normal_dst
-///
-static void createRetBitCast(CallSite CS, Type *RetTy, CastInst **RetBitCast) {
-
- // Save the users of the calling instruction. These uses will be changed to
- // use the bitcast after we create it.
- SmallVector<User *, 16> UsersToUpdate;
- for (User *U : CS.getInstruction()->users())
- UsersToUpdate.push_back(U);
-
- // Determine an appropriate location to create the bitcast for the return
- // value. The location depends on if we have a call or invoke instruction.
- Instruction *InsertBefore = nullptr;
- if (auto *Invoke = dyn_cast<InvokeInst>(CS.getInstruction()))
- InsertBefore =
- &SplitEdge(Invoke->getParent(), Invoke->getNormalDest())->front();
- else
- InsertBefore = &*std::next(CS.getInstruction()->getIterator());
-
- // Bitcast the return value to the correct type.
- auto *Cast = CastInst::CreateBitOrPointerCast(CS.getInstruction(), RetTy, "",
- InsertBefore);
- if (RetBitCast)
- *RetBitCast = Cast;
-
- // Replace all the original uses of the calling instruction with the bitcast.
- for (User *U : UsersToUpdate)
- U->replaceUsesOfWith(CS.getInstruction(), Cast);
-}
-
-/// Predicate and clone the given call site.
-///
-/// This function creates an if-then-else structure at the location of the call
-/// site. The "if" condition compares the call site's called value to the given
-/// callee. The original call site is moved into the "else" block, and a clone
-/// of the call site is placed in the "then" block. The cloned instruction is
-/// returned.
-///
-/// For example, the call instruction below:
-///
-/// orig_bb:
-/// %t0 = call i32 %ptr()
-/// ...
-///
-/// Is replace by the following:
-///
-/// orig_bb:
-/// %cond = icmp eq i32 ()* %ptr, @func
-/// br i1 %cond, %then_bb, %else_bb
-///
-/// then_bb:
-/// ; The clone of the original call instruction is placed in the "then"
-/// ; block. It is not yet promoted.
-/// %t1 = call i32 %ptr()
-/// br merge_bb
-///
-/// else_bb:
-/// ; The original call instruction is moved to the "else" block.
-/// %t0 = call i32 %ptr()
-/// br merge_bb
-///
-/// merge_bb:
-/// ; Uses of the original call instruction are replaced by uses of the phi
-/// ; node.
-/// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ]
-/// ...
-///
-/// A similar transformation is performed for invoke instructions. However,
-/// since invokes are terminating, more work is required. For example, the
-/// invoke instruction below:
-///
-/// orig_bb:
-/// %t0 = invoke %ptr() to label %normal_dst unwind label %unwind_dst
-///
-/// Is replace by the following:
-///
-/// orig_bb:
-/// %cond = icmp eq i32 ()* %ptr, @func
-/// br i1 %cond, %then_bb, %else_bb
-///
-/// then_bb:
-/// ; The clone of the original invoke instruction is placed in the "then"
-/// ; block, and its normal destination is set to the "merge" block. It is
-/// ; not yet promoted.
-/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
-///
-/// else_bb:
-/// ; The original invoke instruction is moved into the "else" block, and
-/// ; its normal destination is set to the "merge" block.
-/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst
-///
-/// merge_bb:
-/// ; Uses of the original invoke instruction are replaced by uses of the
-/// ; phi node, and the merge block branches to the normal destination.
-/// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ]
-/// br %normal_dst
-///
-static Instruction *versionCallSite(CallSite CS, Value *Callee,
- MDNode *BranchWeights) {
-
- IRBuilder<> Builder(CS.getInstruction());
- Instruction *OrigInst = CS.getInstruction();
- BasicBlock *OrigBlock = OrigInst->getParent();
-
- // Create the compare. The called value and callee must have the same type to
- // be compared.
- if (CS.getCalledValue()->getType() != Callee->getType())
- Callee = Builder.CreateBitCast(Callee, CS.getCalledValue()->getType());
- auto *Cond = Builder.CreateICmpEQ(CS.getCalledValue(), Callee);
-
- // Create an if-then-else structure. The original instruction is moved into
- // the "else" block, and a clone of the original instruction is placed in the
- // "then" block.
- Instruction *ThenTerm = nullptr;
- Instruction *ElseTerm = nullptr;
- SplitBlockAndInsertIfThenElse(Cond, CS.getInstruction(), &ThenTerm, &ElseTerm,
- BranchWeights);
- BasicBlock *ThenBlock = ThenTerm->getParent();
- BasicBlock *ElseBlock = ElseTerm->getParent();
- BasicBlock *MergeBlock = OrigInst->getParent();
-
- ThenBlock->setName("if.true.direct_targ");
- ElseBlock->setName("if.false.orig_indirect");
- MergeBlock->setName("if.end.icp");
-
- Instruction *NewInst = OrigInst->clone();
- OrigInst->moveBefore(ElseTerm);
- NewInst->insertBefore(ThenTerm);
-
- // If the original call site is an invoke instruction, we have extra work to
- // do since invoke instructions are terminating. We have to fix-up phi nodes
- // in the invoke's normal and unwind destinations.
- if (auto *OrigInvoke = dyn_cast<InvokeInst>(OrigInst)) {
- auto *NewInvoke = cast<InvokeInst>(NewInst);
-
- // Invoke instructions are terminating, so we don't need the terminator
- // instructions that were just created.
- ThenTerm->eraseFromParent();
- ElseTerm->eraseFromParent();
-
- // Branch from the "merge" block to the original normal destination.
- Builder.SetInsertPoint(MergeBlock);
- Builder.CreateBr(OrigInvoke->getNormalDest());
-
- // Fix-up phi nodes in the original invoke's normal and unwind destinations.
- fixupPHINodeForNormalDest(OrigInvoke, OrigBlock, MergeBlock);
- fixupPHINodeForUnwindDest(OrigInvoke, MergeBlock, ThenBlock, ElseBlock);
-
- // Now set the normal destinations of the invoke instructions to be the
- // "merge" block.
- OrigInvoke->setNormalDest(MergeBlock);
- NewInvoke->setNormalDest(MergeBlock);
- }
-
- // Create a phi node for the returned value of the call site.
- createRetPHINode(OrigInst, NewInst, MergeBlock, Builder);
-
- return NewInst;
-}
-
-bool llvm::isLegalToPromote(CallSite CS, Function *Callee,
- const char **FailureReason) {
- assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted");
-
- auto &DL = Callee->getParent()->getDataLayout();
-
- // Check the return type. The callee's return value type must be bitcast
- // compatible with the call site's type.
- Type *CallRetTy = CS.getInstruction()->getType();
- Type *FuncRetTy = Callee->getReturnType();
- if (CallRetTy != FuncRetTy)
- if (!CastInst::isBitOrNoopPointerCastable(FuncRetTy, CallRetTy, DL)) {
- if (FailureReason)
- *FailureReason = "Return type mismatch";
- return false;
- }
-
- // The number of formal arguments of the callee.
- unsigned NumParams = Callee->getFunctionType()->getNumParams();
-
- // Check the number of arguments. The callee and call site must agree on the
- // number of arguments.
- if (CS.arg_size() != NumParams && !Callee->isVarArg()) {
- if (FailureReason)
- *FailureReason = "The number of arguments mismatch";
- return false;
- }
-
- // Check the argument types. The callee's formal argument types must be
- // bitcast compatible with the corresponding actual argument types of the call
- // site.
- for (unsigned I = 0; I < NumParams; ++I) {
- Type *FormalTy = Callee->getFunctionType()->getFunctionParamType(I);
- Type *ActualTy = CS.getArgument(I)->getType();
- if (FormalTy == ActualTy)
- continue;
- if (!CastInst::isBitOrNoopPointerCastable(ActualTy, FormalTy, DL)) {
- if (FailureReason)
- *FailureReason = "Argument type mismatch";
- return false;
- }
- }
-
- return true;
-}
-
-Instruction *llvm::promoteCall(CallSite CS, Function *Callee,
- CastInst **RetBitCast) {
- assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted");
-
- // Set the called function of the call site to be the given callee (but don't
- // change the type).
- cast<CallBase>(CS.getInstruction())->setCalledOperand(Callee);
-
- // Since the call site will no longer be direct, we must clear metadata that
- // is only appropriate for indirect calls. This includes !prof and !callees
- // metadata.
- CS.getInstruction()->setMetadata(LLVMContext::MD_prof, nullptr);
- CS.getInstruction()->setMetadata(LLVMContext::MD_callees, nullptr);
-
- // If the function type of the call site matches that of the callee, no
- // additional work is required.
- if (CS.getFunctionType() == Callee->getFunctionType())
- return CS.getInstruction();
-
- // Save the return types of the call site and callee.
- Type *CallSiteRetTy = CS.getInstruction()->getType();
- Type *CalleeRetTy = Callee->getReturnType();
-
- // Change the function type of the call site the match that of the callee.
- CS.mutateFunctionType(Callee->getFunctionType());
-
- // Inspect the arguments of the call site. If an argument's type doesn't
- // match the corresponding formal argument's type in the callee, bitcast it
- // to the correct type.
- auto CalleeType = Callee->getFunctionType();
- auto CalleeParamNum = CalleeType->getNumParams();
-
- LLVMContext &Ctx = Callee->getContext();
- const AttributeList &CallerPAL = CS.getAttributes();
- // The new list of argument attributes.
- SmallVector<AttributeSet, 4> NewArgAttrs;
- bool AttributeChanged = false;
-
- for (unsigned ArgNo = 0; ArgNo < CalleeParamNum; ++ArgNo) {
- auto *Arg = CS.getArgument(ArgNo);
- Type *FormalTy = CalleeType->getParamType(ArgNo);
- Type *ActualTy = Arg->getType();
- if (FormalTy != ActualTy) {
- auto *Cast = CastInst::CreateBitOrPointerCast(Arg, FormalTy, "",
- CS.getInstruction());
- CS.setArgument(ArgNo, Cast);
-
- // Remove any incompatible attributes for the argument.
- AttrBuilder ArgAttrs(CallerPAL.getParamAttributes(ArgNo));
- ArgAttrs.remove(AttributeFuncs::typeIncompatible(FormalTy));
-
- // If byval is used, this must be a pointer type, and the byval type must
- // match the element type. Update it if present.
- if (ArgAttrs.getByValType()) {
- Type *NewTy = Callee->getParamByValType(ArgNo);
- ArgAttrs.addByValAttr(
- NewTy ? NewTy : cast<PointerType>(FormalTy)->getElementType());
- }
-
- NewArgAttrs.push_back(AttributeSet::get(Ctx, ArgAttrs));
- AttributeChanged = true;
- } else
- NewArgAttrs.push_back(CallerPAL.getParamAttributes(ArgNo));
- }
-
- // If the return type of the call site doesn't match that of the callee, cast
- // the returned value to the appropriate type.
- // Remove any incompatible return value attribute.
- AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
- if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy) {
- createRetBitCast(CS, CallSiteRetTy, RetBitCast);
- RAttrs.remove(AttributeFuncs::typeIncompatible(CalleeRetTy));
- AttributeChanged = true;
- }
-
- // Set the new callsite attribute.
- if (AttributeChanged)
- CS.setAttributes(AttributeList::get(Ctx, CallerPAL.getFnAttributes(),
- AttributeSet::get(Ctx, RAttrs),
- NewArgAttrs));
-
- return CS.getInstruction();
-}
-
-Instruction *llvm::promoteCallWithIfThenElse(CallSite CS, Function *Callee,
- MDNode *BranchWeights) {
-
- // Version the indirect call site. If the called value is equal to the given
- // callee, 'NewInst' will be executed, otherwise the original call site will
- // be executed.
- Instruction *NewInst = versionCallSite(CS, Callee, BranchWeights);
-
- // Promote 'NewInst' so that it directly calls the desired function.
- return promoteCall(CallSite(NewInst), Callee);
-}
-
-#undef DEBUG_TYPE
diff --git a/contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp b/contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
deleted file mode 100644
index 455fcbb1cf98..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-//===- CanonicalizeAliases.cpp - ThinLTO Support: Canonicalize Aliases ----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Currently this file implements partial alias canonicalization, to
-// flatten chains of aliases (also done by GlobalOpt, but not on for
-// O0 compiles). E.g.
-// @a = alias i8, i8 *@b
-// @b = alias i8, i8 *@g
-//
-// will be converted to:
-// @a = alias i8, i8 *@g <-- @a is now an alias to base object @g
-// @b = alias i8, i8 *@g
-//
-// Eventually this file will implement full alias canonicalation, so that
-// all aliasees are private anonymous values. E.g.
-// @a = alias i8, i8 *@g
-// @g = global i8 0
-//
-// will be converted to:
-// @0 = private global
-// @a = alias i8, i8* @0
-// @g = alias i8, i8* @0
-//
-// This simplifies optimization and ThinLTO linking of the original symbols.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
-
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/ValueHandle.h"
-
-using namespace llvm;
-
-namespace {
-
-static Constant *canonicalizeAlias(Constant *C, bool &Changed) {
- if (auto *GA = dyn_cast<GlobalAlias>(C)) {
- auto *NewAliasee = canonicalizeAlias(GA->getAliasee(), Changed);
- if (NewAliasee != GA->getAliasee()) {
- GA->setAliasee(NewAliasee);
- Changed = true;
- }
- return NewAliasee;
- }
-
- auto *CE = dyn_cast<ConstantExpr>(C);
- if (!CE)
- return C;
-
- std::vector<Constant *> Ops;
- for (Use &U : CE->operands())
- Ops.push_back(canonicalizeAlias(cast<Constant>(U), Changed));
- return CE->getWithOperands(Ops);
-}
-
-/// Convert aliases to canonical form.
-static bool canonicalizeAliases(Module &M) {
- bool Changed = false;
- for (auto &GA : M.aliases())
- canonicalizeAlias(&GA, Changed);
- return Changed;
-}
-
-// Legacy pass that canonicalizes aliases.
-class CanonicalizeAliasesLegacyPass : public ModulePass {
-
-public:
- /// Pass identification, replacement for typeid
- static char ID;
-
- /// Specify pass name for debug output
- StringRef getPassName() const override { return "Canonicalize Aliases"; }
-
- explicit CanonicalizeAliasesLegacyPass() : ModulePass(ID) {}
-
- bool runOnModule(Module &M) override { return canonicalizeAliases(M); }
-};
-char CanonicalizeAliasesLegacyPass::ID = 0;
-
-} // anonymous namespace
-
-PreservedAnalyses CanonicalizeAliasesPass::run(Module &M,
- ModuleAnalysisManager &AM) {
- if (!canonicalizeAliases(M))
- return PreservedAnalyses::all();
-
- return PreservedAnalyses::none();
-}
-
-INITIALIZE_PASS_BEGIN(CanonicalizeAliasesLegacyPass, "canonicalize-aliases",
- "Canonicalize aliases", false, false)
-INITIALIZE_PASS_END(CanonicalizeAliasesLegacyPass, "canonicalize-aliases",
- "Canonicalize aliases", false, false)
-
-namespace llvm {
-ModulePass *createCanonicalizeAliasesPass() {
- return new CanonicalizeAliasesLegacyPass();
-}
-} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
deleted file mode 100644
index 1026c9d37038..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ /dev/null
@@ -1,863 +0,0 @@
-//===- CloneFunction.cpp - Clone a function into another function ---------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the CloneFunctionInto interface, which is used as the
-// low-level function cloner. This is used by the CloneFunction and function
-// inliner to do the dirty work of copying the body of a function around.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-#include <map>
-using namespace llvm;
-
-/// See comments in Cloning.h.
-BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
- const Twine &NameSuffix, Function *F,
- ClonedCodeInfo *CodeInfo,
- DebugInfoFinder *DIFinder) {
- DenseMap<const MDNode *, MDNode *> Cache;
- BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
- if (BB->hasName())
- NewBB->setName(BB->getName() + NameSuffix);
-
- bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
- Module *TheModule = F ? F->getParent() : nullptr;
-
- // Loop over all instructions, and copy them over.
- for (const Instruction &I : *BB) {
- if (DIFinder && TheModule)
- DIFinder->processInstruction(*TheModule, I);
-
- Instruction *NewInst = I.clone();
- if (I.hasName())
- NewInst->setName(I.getName() + NameSuffix);
- NewBB->getInstList().push_back(NewInst);
- VMap[&I] = NewInst; // Add instruction map to value.
-
- hasCalls |= (isa<CallInst>(I) && !isa<DbgInfoIntrinsic>(I));
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
- if (isa<ConstantInt>(AI->getArraySize()))
- hasStaticAllocas = true;
- else
- hasDynamicAllocas = true;
- }
- }
-
- if (CodeInfo) {
- CodeInfo->ContainsCalls |= hasCalls;
- CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
- CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
- BB != &BB->getParent()->getEntryBlock();
- }
- return NewBB;
-}
-
-// Clone OldFunc into NewFunc, transforming the old arguments into references to
-// VMap values.
-//
-void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
- ValueToValueMapTy &VMap,
- bool ModuleLevelChanges,
- SmallVectorImpl<ReturnInst*> &Returns,
- const char *NameSuffix, ClonedCodeInfo *CodeInfo,
- ValueMapTypeRemapper *TypeMapper,
- ValueMaterializer *Materializer) {
- assert(NameSuffix && "NameSuffix cannot be null!");
-
-#ifndef NDEBUG
- for (const Argument &I : OldFunc->args())
- assert(VMap.count(&I) && "No mapping from source argument specified!");
-#endif
-
- // Copy all attributes other than those stored in the AttributeList. We need
- // to remap the parameter indices of the AttributeList.
- AttributeList NewAttrs = NewFunc->getAttributes();
- NewFunc->copyAttributesFrom(OldFunc);
- NewFunc->setAttributes(NewAttrs);
-
- // Fix up the personality function that got copied over.
- if (OldFunc->hasPersonalityFn())
- NewFunc->setPersonalityFn(
- MapValue(OldFunc->getPersonalityFn(), VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
- TypeMapper, Materializer));
-
- SmallVector<AttributeSet, 4> NewArgAttrs(NewFunc->arg_size());
- AttributeList OldAttrs = OldFunc->getAttributes();
-
- // Clone any argument attributes that are present in the VMap.
- for (const Argument &OldArg : OldFunc->args()) {
- if (Argument *NewArg = dyn_cast<Argument>(VMap[&OldArg])) {
- NewArgAttrs[NewArg->getArgNo()] =
- OldAttrs.getParamAttributes(OldArg.getArgNo());
- }
- }
-
- NewFunc->setAttributes(
- AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(),
- OldAttrs.getRetAttributes(), NewArgAttrs));
-
- bool MustCloneSP =
- OldFunc->getParent() && OldFunc->getParent() == NewFunc->getParent();
- DISubprogram *SP = OldFunc->getSubprogram();
- if (SP) {
- assert(!MustCloneSP || ModuleLevelChanges);
- // Add mappings for some DebugInfo nodes that we don't want duplicated
- // even if they're distinct.
- auto &MD = VMap.MD();
- MD[SP->getUnit()].reset(SP->getUnit());
- MD[SP->getType()].reset(SP->getType());
- MD[SP->getFile()].reset(SP->getFile());
- // If we're not cloning into the same module, no need to clone the
- // subprogram
- if (!MustCloneSP)
- MD[SP].reset(SP);
- }
-
- SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
- OldFunc->getAllMetadata(MDs);
- for (auto MD : MDs) {
- NewFunc->addMetadata(
- MD.first,
- *MapMetadata(MD.second, VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
- TypeMapper, Materializer));
- }
-
- // When we remap instructions, we want to avoid duplicating inlined
- // DISubprograms, so record all subprograms we find as we duplicate
- // instructions and then freeze them in the MD map.
- // We also record information about dbg.value and dbg.declare to avoid
- // duplicating the types.
- DebugInfoFinder DIFinder;
-
- // Loop over all of the basic blocks in the function, cloning them as
- // appropriate. Note that we save BE this way in order to handle cloning of
- // recursive functions into themselves.
- //
- for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
- BI != BE; ++BI) {
- const BasicBlock &BB = *BI;
-
- // Create a new basic block and copy instructions into it!
- BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo,
- ModuleLevelChanges ? &DIFinder : nullptr);
-
- // Add basic block mapping.
- VMap[&BB] = CBB;
-
- // It is only legal to clone a function if a block address within that
- // function is never referenced outside of the function. Given that, we
- // want to map block addresses from the old function to block addresses in
- // the clone. (This is different from the generic ValueMapper
- // implementation, which generates an invalid blockaddress when
- // cloning a function.)
- if (BB.hasAddressTaken()) {
- Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
- const_cast<BasicBlock*>(&BB));
- VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);
- }
-
- // Note return instructions for the caller.
- if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
- Returns.push_back(RI);
- }
-
- for (DISubprogram *ISP : DIFinder.subprograms())
- if (ISP != SP)
- VMap.MD()[ISP].reset(ISP);
-
- for (DICompileUnit *CU : DIFinder.compile_units())
- VMap.MD()[CU].reset(CU);
-
- for (DIType *Type : DIFinder.types())
- VMap.MD()[Type].reset(Type);
-
- // Loop over all of the instructions in the function, fixing up operand
- // references as we go. This uses VMap to do all the hard work.
- for (Function::iterator BB =
- cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(),
- BE = NewFunc->end();
- BB != BE; ++BB)
- // Loop over all instructions, fixing each one as we find it...
- for (Instruction &II : *BB)
- RemapInstruction(&II, VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
- TypeMapper, Materializer);
-}
-
-/// Return a copy of the specified function and add it to that function's
-/// module. Also, any references specified in the VMap are changed to refer to
-/// their mapped value instead of the original one. If any of the arguments to
-/// the function are in the VMap, the arguments are deleted from the resultant
-/// function. The VMap is updated to include mappings from all of the
-/// instructions and basicblocks in the function from their old to new values.
-///
-Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap,
- ClonedCodeInfo *CodeInfo) {
- std::vector<Type*> ArgTypes;
-
- // The user might be deleting arguments to the function by specifying them in
- // the VMap. If so, we need to not add the arguments to the arg ty vector
- //
- for (const Argument &I : F->args())
- if (VMap.count(&I) == 0) // Haven't mapped the argument to anything yet?
- ArgTypes.push_back(I.getType());
-
- // Create a new function type...
- FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(),
- ArgTypes, F->getFunctionType()->isVarArg());
-
- // Create the new function...
- Function *NewF = Function::Create(FTy, F->getLinkage(), F->getAddressSpace(),
- F->getName(), F->getParent());
-
- // Loop over the arguments, copying the names of the mapped arguments over...
- Function::arg_iterator DestI = NewF->arg_begin();
- for (const Argument & I : F->args())
- if (VMap.count(&I) == 0) { // Is this argument preserved?
- DestI->setName(I.getName()); // Copy the name over...
- VMap[&I] = &*DestI++; // Add mapping to VMap
- }
-
- SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(NewF, F, VMap, F->getSubprogram() != nullptr, Returns, "",
- CodeInfo);
-
- return NewF;
-}
-
-
-
-namespace {
- /// This is a private class used to implement CloneAndPruneFunctionInto.
- struct PruningFunctionCloner {
- Function *NewFunc;
- const Function *OldFunc;
- ValueToValueMapTy &VMap;
- bool ModuleLevelChanges;
- const char *NameSuffix;
- ClonedCodeInfo *CodeInfo;
-
- public:
- PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
- ValueToValueMapTy &valueMap, bool moduleLevelChanges,
- const char *nameSuffix, ClonedCodeInfo *codeInfo)
- : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap),
- ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix),
- CodeInfo(codeInfo) {}
-
- /// The specified block is found to be reachable, clone it and
- /// anything that it can reach.
- void CloneBlock(const BasicBlock *BB,
- BasicBlock::const_iterator StartingInst,
- std::vector<const BasicBlock*> &ToClone);
- };
-}
-
-/// The specified block is found to be reachable, clone it and
-/// anything that it can reach.
-void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
- BasicBlock::const_iterator StartingInst,
- std::vector<const BasicBlock*> &ToClone){
- WeakTrackingVH &BBEntry = VMap[BB];
-
- // Have we already cloned this block?
- if (BBEntry) return;
-
- // Nope, clone it now.
- BasicBlock *NewBB;
- BBEntry = NewBB = BasicBlock::Create(BB->getContext());
- if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
-
- // It is only legal to clone a function if a block address within that
- // function is never referenced outside of the function. Given that, we
- // want to map block addresses from the old function to block addresses in
- // the clone. (This is different from the generic ValueMapper
- // implementation, which generates an invalid blockaddress when
- // cloning a function.)
- //
- // Note that we don't need to fix the mapping for unreachable blocks;
- // the default mapping there is safe.
- if (BB->hasAddressTaken()) {
- Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
- const_cast<BasicBlock*>(BB));
- VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB);
- }
-
- bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
-
- // Loop over all instructions, and copy them over, DCE'ing as we go. This
- // loop doesn't include the terminator.
- for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end();
- II != IE; ++II) {
-
- Instruction *NewInst = II->clone();
-
- // Eagerly remap operands to the newly cloned instruction, except for PHI
- // nodes for which we defer processing until we update the CFG.
- if (!isa<PHINode>(NewInst)) {
- RemapInstruction(NewInst, VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
-
- // If we can simplify this instruction to some other value, simply add
- // a mapping to that value rather than inserting a new instruction into
- // the basic block.
- if (Value *V =
- SimplifyInstruction(NewInst, BB->getModule()->getDataLayout())) {
- // On the off-chance that this simplifies to an instruction in the old
- // function, map it back into the new function.
- if (NewFunc != OldFunc)
- if (Value *MappedV = VMap.lookup(V))
- V = MappedV;
-
- if (!NewInst->mayHaveSideEffects()) {
- VMap[&*II] = V;
- NewInst->deleteValue();
- continue;
- }
- }
- }
-
- if (II->hasName())
- NewInst->setName(II->getName()+NameSuffix);
- VMap[&*II] = NewInst; // Add instruction map to value.
- NewBB->getInstList().push_back(NewInst);
- hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
-
- if (CodeInfo)
- if (auto CS = ImmutableCallSite(&*II))
- if (CS.hasOperandBundles())
- CodeInfo->OperandBundleCallSites.push_back(NewInst);
-
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
- if (isa<ConstantInt>(AI->getArraySize()))
- hasStaticAllocas = true;
- else
- hasDynamicAllocas = true;
- }
- }
-
- // Finally, clone over the terminator.
- const Instruction *OldTI = BB->getTerminator();
- bool TerminatorDone = false;
- if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
- if (BI->isConditional()) {
- // If the condition was a known constant in the callee...
- ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
- // Or is a known constant in the caller...
- if (!Cond) {
- Value *V = VMap.lookup(BI->getCondition());
- Cond = dyn_cast_or_null<ConstantInt>(V);
- }
-
- // Constant fold to uncond branch!
- if (Cond) {
- BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue());
- VMap[OldTI] = BranchInst::Create(Dest, NewBB);
- ToClone.push_back(Dest);
- TerminatorDone = true;
- }
- }
- } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
- // If switching on a value known constant in the caller.
- ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
- if (!Cond) { // Or known constant after constant prop in the callee...
- Value *V = VMap.lookup(SI->getCondition());
- Cond = dyn_cast_or_null<ConstantInt>(V);
- }
- if (Cond) { // Constant fold to uncond branch!
- SwitchInst::ConstCaseHandle Case = *SI->findCaseValue(Cond);
- BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor());
- VMap[OldTI] = BranchInst::Create(Dest, NewBB);
- ToClone.push_back(Dest);
- TerminatorDone = true;
- }
- }
-
- if (!TerminatorDone) {
- Instruction *NewInst = OldTI->clone();
- if (OldTI->hasName())
- NewInst->setName(OldTI->getName()+NameSuffix);
- NewBB->getInstList().push_back(NewInst);
- VMap[OldTI] = NewInst; // Add instruction map to value.
-
- if (CodeInfo)
- if (auto CS = ImmutableCallSite(OldTI))
- if (CS.hasOperandBundles())
- CodeInfo->OperandBundleCallSites.push_back(NewInst);
-
- // Recursively clone any reachable successor blocks.
- const Instruction *TI = BB->getTerminator();
- for (const BasicBlock *Succ : successors(TI))
- ToClone.push_back(Succ);
- }
-
- if (CodeInfo) {
- CodeInfo->ContainsCalls |= hasCalls;
- CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
- CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
- BB != &BB->getParent()->front();
- }
-}
-
-/// This works like CloneAndPruneFunctionInto, except that it does not clone the
-/// entire function. Instead it starts at an instruction provided by the caller
-/// and copies (and prunes) only the code reachable from that instruction.
-void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
- const Instruction *StartingInst,
- ValueToValueMapTy &VMap,
- bool ModuleLevelChanges,
- SmallVectorImpl<ReturnInst *> &Returns,
- const char *NameSuffix,
- ClonedCodeInfo *CodeInfo) {
- assert(NameSuffix && "NameSuffix cannot be null!");
-
- ValueMapTypeRemapper *TypeMapper = nullptr;
- ValueMaterializer *Materializer = nullptr;
-
-#ifndef NDEBUG
- // If the cloning starts at the beginning of the function, verify that
- // the function arguments are mapped.
- if (!StartingInst)
- for (const Argument &II : OldFunc->args())
- assert(VMap.count(&II) && "No mapping from source argument specified!");
-#endif
-
- PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
- NameSuffix, CodeInfo);
- const BasicBlock *StartingBB;
- if (StartingInst)
- StartingBB = StartingInst->getParent();
- else {
- StartingBB = &OldFunc->getEntryBlock();
- StartingInst = &StartingBB->front();
- }
-
- // Clone the entry block, and anything recursively reachable from it.
- std::vector<const BasicBlock*> CloneWorklist;
- PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist);
- while (!CloneWorklist.empty()) {
- const BasicBlock *BB = CloneWorklist.back();
- CloneWorklist.pop_back();
- PFC.CloneBlock(BB, BB->begin(), CloneWorklist);
- }
-
- // Loop over all of the basic blocks in the old function. If the block was
- // reachable, we have cloned it and the old block is now in the value map:
- // insert it into the new function in the right order. If not, ignore it.
- //
- // Defer PHI resolution until rest of function is resolved.
- SmallVector<const PHINode*, 16> PHIToResolve;
- for (const BasicBlock &BI : *OldFunc) {
- Value *V = VMap.lookup(&BI);
- BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
- if (!NewBB) continue; // Dead block.
-
- // Add the new block to the new function.
- NewFunc->getBasicBlockList().push_back(NewBB);
-
- // Handle PHI nodes specially, as we have to remove references to dead
- // blocks.
- for (const PHINode &PN : BI.phis()) {
- // PHI nodes may have been remapped to non-PHI nodes by the caller or
- // during the cloning process.
- if (isa<PHINode>(VMap[&PN]))
- PHIToResolve.push_back(&PN);
- else
- break;
- }
-
- // Finally, remap the terminator instructions, as those can't be remapped
- // until all BBs are mapped.
- RemapInstruction(NewBB->getTerminator(), VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
- TypeMapper, Materializer);
- }
-
- // Defer PHI resolution until rest of function is resolved, PHI resolution
- // requires the CFG to be up-to-date.
- for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
- const PHINode *OPN = PHIToResolve[phino];
- unsigned NumPreds = OPN->getNumIncomingValues();
- const BasicBlock *OldBB = OPN->getParent();
- BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]);
-
- // Map operands for blocks that are live and remove operands for blocks
- // that are dead.
- for (; phino != PHIToResolve.size() &&
- PHIToResolve[phino]->getParent() == OldBB; ++phino) {
- OPN = PHIToResolve[phino];
- PHINode *PN = cast<PHINode>(VMap[OPN]);
- for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
- Value *V = VMap.lookup(PN->getIncomingBlock(pred));
- if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
- Value *InVal = MapValue(PN->getIncomingValue(pred),
- VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
- assert(InVal && "Unknown input value?");
- PN->setIncomingValue(pred, InVal);
- PN->setIncomingBlock(pred, MappedBlock);
- } else {
- PN->removeIncomingValue(pred, false);
- --pred; // Revisit the next entry.
- --e;
- }
- }
- }
-
- // The loop above has removed PHI entries for those blocks that are dead
- // and has updated others. However, if a block is live (i.e. copied over)
- // but its terminator has been changed to not go to this block, then our
- // phi nodes will have invalid entries. Update the PHI nodes in this
- // case.
- PHINode *PN = cast<PHINode>(NewBB->begin());
- NumPreds = pred_size(NewBB);
- if (NumPreds != PN->getNumIncomingValues()) {
- assert(NumPreds < PN->getNumIncomingValues());
- // Count how many times each predecessor comes to this block.
- std::map<BasicBlock*, unsigned> PredCount;
- for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
- PI != E; ++PI)
- --PredCount[*PI];
-
- // Figure out how many entries to remove from each PHI.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- ++PredCount[PN->getIncomingBlock(i)];
-
- // At this point, the excess predecessor entries are positive in the
- // map. Loop over all of the PHIs and remove excess predecessor
- // entries.
- BasicBlock::iterator I = NewBB->begin();
- for (; (PN = dyn_cast<PHINode>(I)); ++I) {
- for (const auto &PCI : PredCount) {
- BasicBlock *Pred = PCI.first;
- for (unsigned NumToRemove = PCI.second; NumToRemove; --NumToRemove)
- PN->removeIncomingValue(Pred, false);
- }
- }
- }
-
- // If the loops above have made these phi nodes have 0 or 1 operand,
- // replace them with undef or the input value. We must do this for
- // correctness, because 0-operand phis are not valid.
- PN = cast<PHINode>(NewBB->begin());
- if (PN->getNumIncomingValues() == 0) {
- BasicBlock::iterator I = NewBB->begin();
- BasicBlock::const_iterator OldI = OldBB->begin();
- while ((PN = dyn_cast<PHINode>(I++))) {
- Value *NV = UndefValue::get(PN->getType());
- PN->replaceAllUsesWith(NV);
- assert(VMap[&*OldI] == PN && "VMap mismatch");
- VMap[&*OldI] = NV;
- PN->eraseFromParent();
- ++OldI;
- }
- }
- }
-
- // Make a second pass over the PHINodes now that all of them have been
- // remapped into the new function, simplifying the PHINode and performing any
- // recursive simplifications exposed. This will transparently update the
- // WeakTrackingVH in the VMap. Notably, we rely on that so that if we coalesce
- // two PHINodes, the iteration over the old PHIs remains valid, and the
- // mapping will just map us to the new node (which may not even be a PHI
- // node).
- const DataLayout &DL = NewFunc->getParent()->getDataLayout();
- SmallSetVector<const Value *, 8> Worklist;
- for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
- if (isa<PHINode>(VMap[PHIToResolve[Idx]]))
- Worklist.insert(PHIToResolve[Idx]);
-
- // Note that we must test the size on each iteration, the worklist can grow.
- for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
- const Value *OrigV = Worklist[Idx];
- auto *I = dyn_cast_or_null<Instruction>(VMap.lookup(OrigV));
- if (!I)
- continue;
-
- // Skip over non-intrinsic callsites, we don't want to remove any nodes from
- // the CGSCC.
- CallSite CS = CallSite(I);
- if (CS && CS.getCalledFunction() && !CS.getCalledFunction()->isIntrinsic())
- continue;
-
- // See if this instruction simplifies.
- Value *SimpleV = SimplifyInstruction(I, DL);
- if (!SimpleV)
- continue;
-
- // Stash away all the uses of the old instruction so we can check them for
- // recursive simplifications after a RAUW. This is cheaper than checking all
- // uses of To on the recursive step in most cases.
- for (const User *U : OrigV->users())
- Worklist.insert(cast<Instruction>(U));
-
- // Replace the instruction with its simplified value.
- I->replaceAllUsesWith(SimpleV);
-
- // If the original instruction had no side effects, remove it.
- if (isInstructionTriviallyDead(I))
- I->eraseFromParent();
- else
- VMap[OrigV] = I;
- }
-
- // Now that the inlined function body has been fully constructed, go through
- // and zap unconditional fall-through branches. This happens all the time when
- // specializing code: code specialization turns conditional branches into
- // uncond branches, and this code folds them.
- Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
- Function::iterator I = Begin;
- while (I != NewFunc->end()) {
- // We need to simplify conditional branches and switches with a constant
- // operand. We try to prune these out when cloning, but if the
- // simplification required looking through PHI nodes, those are only
- // available after forming the full basic block. That may leave some here,
- // and we still want to prune the dead code as early as possible.
- //
- // Do the folding before we check if the block is dead since we want code
- // like
- // bb:
- // br i1 undef, label %bb, label %bb
- // to be simplified to
- // bb:
- // br label %bb
- // before we call I->getSinglePredecessor().
- ConstantFoldTerminator(&*I);
-
- // Check if this block has become dead during inlining or other
- // simplifications. Note that the first block will appear dead, as it has
- // not yet been wired up properly.
- if (I != Begin && (pred_begin(&*I) == pred_end(&*I) ||
- I->getSinglePredecessor() == &*I)) {
- BasicBlock *DeadBB = &*I++;
- DeleteDeadBlock(DeadBB);
- continue;
- }
-
- BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
- if (!BI || BI->isConditional()) { ++I; continue; }
-
- BasicBlock *Dest = BI->getSuccessor(0);
- if (!Dest->getSinglePredecessor()) {
- ++I; continue;
- }
-
- // We shouldn't be able to get single-entry PHI nodes here, as instsimplify
- // above should have zapped all of them..
- assert(!isa<PHINode>(Dest->begin()));
-
- // We know all single-entry PHI nodes in the inlined function have been
- // removed, so we just need to splice the blocks.
- BI->eraseFromParent();
-
- // Make all PHI nodes that referred to Dest now refer to I as their source.
- Dest->replaceAllUsesWith(&*I);
-
- // Move all the instructions in the succ to the pred.
- I->getInstList().splice(I->end(), Dest->getInstList());
-
- // Remove the dest block.
- Dest->eraseFromParent();
-
- // Do not increment I, iteratively merge all things this block branches to.
- }
-
- // Make a final pass over the basic blocks from the old function to gather
- // any return instructions which survived folding. We have to do this here
- // because we can iteratively remove and merge returns above.
- for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(),
- E = NewFunc->end();
- I != E; ++I)
- if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator()))
- Returns.push_back(RI);
-}
-
-
-/// This works exactly like CloneFunctionInto,
-/// except that it does some simple constant prop and DCE on the fly. The
-/// effect of this is to copy significantly less code in cases where (for
-/// example) a function call with constant arguments is inlined, and those
-/// constant arguments cause a significant amount of code in the callee to be
-/// dead. Since this doesn't produce an exact copy of the input, it can't be
-/// used for things like CloneFunction or CloneModule.
-void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
- ValueToValueMapTy &VMap,
- bool ModuleLevelChanges,
- SmallVectorImpl<ReturnInst*> &Returns,
- const char *NameSuffix,
- ClonedCodeInfo *CodeInfo,
- Instruction *TheCall) {
- CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap,
- ModuleLevelChanges, Returns, NameSuffix, CodeInfo);
-}
-
-/// Remaps instructions in \p Blocks using the mapping in \p VMap.
-void llvm::remapInstructionsInBlocks(
- const SmallVectorImpl<BasicBlock *> &Blocks, ValueToValueMapTy &VMap) {
- // Rewrite the code to refer to itself.
- for (auto *BB : Blocks)
- for (auto &Inst : *BB)
- RemapInstruction(&Inst, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
-}
-
-/// Clones a loop \p OrigLoop. Returns the loop and the blocks in \p
-/// Blocks.
-///
-/// Updates LoopInfo and DominatorTree assuming the loop is dominated by block
-/// \p LoopDomBB. Insert the new blocks before block specified in \p Before.
-Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
- Loop *OrigLoop, ValueToValueMapTy &VMap,
- const Twine &NameSuffix, LoopInfo *LI,
- DominatorTree *DT,
- SmallVectorImpl<BasicBlock *> &Blocks) {
- Function *F = OrigLoop->getHeader()->getParent();
- Loop *ParentLoop = OrigLoop->getParentLoop();
- DenseMap<Loop *, Loop *> LMap;
-
- Loop *NewLoop = LI->AllocateLoop();
- LMap[OrigLoop] = NewLoop;
- if (ParentLoop)
- ParentLoop->addChildLoop(NewLoop);
- else
- LI->addTopLevelLoop(NewLoop);
-
- BasicBlock *OrigPH = OrigLoop->getLoopPreheader();
- assert(OrigPH && "No preheader");
- BasicBlock *NewPH = CloneBasicBlock(OrigPH, VMap, NameSuffix, F);
- // To rename the loop PHIs.
- VMap[OrigPH] = NewPH;
- Blocks.push_back(NewPH);
-
- // Update LoopInfo.
- if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(NewPH, *LI);
-
- // Update DominatorTree.
- DT->addNewBlock(NewPH, LoopDomBB);
-
- for (Loop *CurLoop : OrigLoop->getLoopsInPreorder()) {
- Loop *&NewLoop = LMap[CurLoop];
- if (!NewLoop) {
- NewLoop = LI->AllocateLoop();
-
- // Establish the parent/child relationship.
- Loop *OrigParent = CurLoop->getParentLoop();
- assert(OrigParent && "Could not find the original parent loop");
- Loop *NewParentLoop = LMap[OrigParent];
- assert(NewParentLoop && "Could not find the new parent loop");
-
- NewParentLoop->addChildLoop(NewLoop);
- }
- }
-
- for (BasicBlock *BB : OrigLoop->getBlocks()) {
- Loop *CurLoop = LI->getLoopFor(BB);
- Loop *&NewLoop = LMap[CurLoop];
- assert(NewLoop && "Expecting new loop to be allocated");
-
- BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F);
- VMap[BB] = NewBB;
-
- // Update LoopInfo.
- NewLoop->addBasicBlockToLoop(NewBB, *LI);
- if (BB == CurLoop->getHeader())
- NewLoop->moveToHeader(NewBB);
-
- // Add DominatorTree node. After seeing all blocks, update to correct
- // IDom.
- DT->addNewBlock(NewBB, NewPH);
-
- Blocks.push_back(NewBB);
- }
-
- for (BasicBlock *BB : OrigLoop->getBlocks()) {
- // Update DominatorTree.
- BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock();
- DT->changeImmediateDominator(cast<BasicBlock>(VMap[BB]),
- cast<BasicBlock>(VMap[IDomBB]));
- }
-
- // Move them physically from the end of the block list.
- F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(),
- NewPH);
- F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(),
- NewLoop->getHeader()->getIterator(), F->end());
-
- return NewLoop;
-}
-
-/// Duplicate non-Phi instructions from the beginning of block up to
-/// StopAt instruction into a split block between BB and its predecessor.
-BasicBlock *llvm::DuplicateInstructionsInSplitBetween(
- BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt,
- ValueToValueMapTy &ValueMapping, DomTreeUpdater &DTU) {
-
- assert(count(successors(PredBB), BB) == 1 &&
- "There must be a single edge between PredBB and BB!");
- // We are going to have to map operands from the original BB block to the new
- // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
- // account for entry from PredBB.
- BasicBlock::iterator BI = BB->begin();
- for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
- ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
-
- BasicBlock *NewBB = SplitEdge(PredBB, BB);
- NewBB->setName(PredBB->getName() + ".split");
- Instruction *NewTerm = NewBB->getTerminator();
-
- // FIXME: SplitEdge does not yet take a DTU, so we include the split edge
- // in the update set here.
- DTU.applyUpdates({{DominatorTree::Delete, PredBB, BB},
- {DominatorTree::Insert, PredBB, NewBB},
- {DominatorTree::Insert, NewBB, BB}});
-
- // Clone the non-phi instructions of BB into NewBB, keeping track of the
- // mapping and using it to remap operands in the cloned instructions.
- // Stop once we see the terminator too. This covers the case where BB's
- // terminator gets replaced and StopAt == BB's terminator.
- for (; StopAt != &*BI && BB->getTerminator() != &*BI; ++BI) {
- Instruction *New = BI->clone();
- New->setName(BI->getName());
- New->insertBefore(NewTerm);
- ValueMapping[&*BI] = New;
-
- // Remap operands to patch up intra-block references.
- for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
- if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
- auto I = ValueMapping.find(Inst);
- if (I != ValueMapping.end())
- New->setOperand(i, I->second);
- }
- }
-
- return NewBB;
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
deleted file mode 100644
index 7ddf59becba9..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ /dev/null
@@ -1,202 +0,0 @@
-//===- CloneModule.cpp - Clone an entire module ---------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the CloneModule interface which makes a copy of an
-// entire module.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-using namespace llvm;
-
-static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) {
- const Comdat *SC = Src->getComdat();
- if (!SC)
- return;
- Comdat *DC = Dst->getParent()->getOrInsertComdat(SC->getName());
- DC->setSelectionKind(SC->getSelectionKind());
- Dst->setComdat(DC);
-}
-
-/// This is not as easy as it might seem because we have to worry about making
-/// copies of global variables and functions, and making their (initializers and
-/// references, respectively) refer to the right globals.
-///
-std::unique_ptr<Module> llvm::CloneModule(const Module &M) {
- // Create the value map that maps things from the old module over to the new
- // module.
- ValueToValueMapTy VMap;
- return CloneModule(M, VMap);
-}
-
-std::unique_ptr<Module> llvm::CloneModule(const Module &M,
- ValueToValueMapTy &VMap) {
- return CloneModule(M, VMap, [](const GlobalValue *GV) { return true; });
-}
-
-std::unique_ptr<Module> llvm::CloneModule(
- const Module &M, ValueToValueMapTy &VMap,
- function_ref<bool(const GlobalValue *)> ShouldCloneDefinition) {
- // First off, we need to create the new module.
- std::unique_ptr<Module> New =
- llvm::make_unique<Module>(M.getModuleIdentifier(), M.getContext());
- New->setSourceFileName(M.getSourceFileName());
- New->setDataLayout(M.getDataLayout());
- New->setTargetTriple(M.getTargetTriple());
- New->setModuleInlineAsm(M.getModuleInlineAsm());
-
- // Loop over all of the global variables, making corresponding globals in the
- // new module. Here we add them to the VMap and to the new Module. We
- // don't worry about attributes or initializers, they will come later.
- //
- for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I) {
- GlobalVariable *GV = new GlobalVariable(*New,
- I->getValueType(),
- I->isConstant(), I->getLinkage(),
- (Constant*) nullptr, I->getName(),
- (GlobalVariable*) nullptr,
- I->getThreadLocalMode(),
- I->getType()->getAddressSpace());
- GV->copyAttributesFrom(&*I);
- VMap[&*I] = GV;
- }
-
- // Loop over the functions in the module, making external functions as before
- for (const Function &I : M) {
- Function *NF =
- Function::Create(cast<FunctionType>(I.getValueType()), I.getLinkage(),
- I.getAddressSpace(), I.getName(), New.get());
- NF->copyAttributesFrom(&I);
- VMap[&I] = NF;
- }
-
- // Loop over the aliases in the module
- for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
- I != E; ++I) {
- if (!ShouldCloneDefinition(&*I)) {
- // An alias cannot act as an external reference, so we need to create
- // either a function or a global variable depending on the value type.
- // FIXME: Once pointee types are gone we can probably pick one or the
- // other.
- GlobalValue *GV;
- if (I->getValueType()->isFunctionTy())
- GV = Function::Create(cast<FunctionType>(I->getValueType()),
- GlobalValue::ExternalLinkage,
- I->getAddressSpace(), I->getName(), New.get());
- else
- GV = new GlobalVariable(
- *New, I->getValueType(), false, GlobalValue::ExternalLinkage,
- nullptr, I->getName(), nullptr,
- I->getThreadLocalMode(), I->getType()->getAddressSpace());
- VMap[&*I] = GV;
- // We do not copy attributes (mainly because copying between different
- // kinds of globals is forbidden), but this is generally not required for
- // correctness.
- continue;
- }
- auto *GA = GlobalAlias::create(I->getValueType(),
- I->getType()->getPointerAddressSpace(),
- I->getLinkage(), I->getName(), New.get());
- GA->copyAttributesFrom(&*I);
- VMap[&*I] = GA;
- }
-
- // Now that all of the things that global variable initializer can refer to
- // have been created, loop through and copy the global variable referrers
- // over... We also set the attributes on the global now.
- //
- for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I) {
- if (I->isDeclaration())
- continue;
-
- GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]);
- if (!ShouldCloneDefinition(&*I)) {
- // Skip after setting the correct linkage for an external reference.
- GV->setLinkage(GlobalValue::ExternalLinkage);
- continue;
- }
- if (I->hasInitializer())
- GV->setInitializer(MapValue(I->getInitializer(), VMap));
-
- SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
- I->getAllMetadata(MDs);
- for (auto MD : MDs)
- GV->addMetadata(MD.first,
- *MapMetadata(MD.second, VMap, RF_MoveDistinctMDs));
-
- copyComdat(GV, &*I);
- }
-
- // Similarly, copy over function bodies now...
- //
- for (const Function &I : M) {
- if (I.isDeclaration())
- continue;
-
- Function *F = cast<Function>(VMap[&I]);
- if (!ShouldCloneDefinition(&I)) {
- // Skip after setting the correct linkage for an external reference.
- F->setLinkage(GlobalValue::ExternalLinkage);
- // Personality function is not valid on a declaration.
- F->setPersonalityFn(nullptr);
- continue;
- }
-
- Function::arg_iterator DestI = F->arg_begin();
- for (Function::const_arg_iterator J = I.arg_begin(); J != I.arg_end();
- ++J) {
- DestI->setName(J->getName());
- VMap[&*J] = &*DestI++;
- }
-
- SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(F, &I, VMap, /*ModuleLevelChanges=*/true, Returns);
-
- if (I.hasPersonalityFn())
- F->setPersonalityFn(MapValue(I.getPersonalityFn(), VMap));
-
- copyComdat(F, &I);
- }
-
- // And aliases
- for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
- I != E; ++I) {
- // We already dealt with undefined aliases above.
- if (!ShouldCloneDefinition(&*I))
- continue;
- GlobalAlias *GA = cast<GlobalAlias>(VMap[&*I]);
- if (const Constant *C = I->getAliasee())
- GA->setAliasee(MapValue(C, VMap));
- }
-
- // And named metadata....
- for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
- E = M.named_metadata_end();
- I != E; ++I) {
- const NamedMDNode &NMD = *I;
- NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
- for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
- NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap));
- }
-
- return New;
-}
-
-extern "C" {
-
-LLVMModuleRef LLVMCloneModule(LLVMModuleRef M) {
- return wrap(CloneModule(*unwrap(M)).release());
-}
-
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
deleted file mode 100644
index fa6d3f8ae873..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ /dev/null
@@ -1,1567 +0,0 @@
-//===- CodeExtractor.cpp - Pull code region into a new function -----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the interface to tear out a code region, such as an
-// individual loop or a parallel section, into a new function, replacing it with
-// a call to the new function.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/CodeExtractor.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/IR/Argument.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
-#include "llvm/IR/Verifier.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/BlockFrequency.h"
-#include "llvm/Support/BranchProbability.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include <cassert>
-#include <cstdint>
-#include <iterator>
-#include <map>
-#include <set>
-#include <utility>
-#include <vector>
-
-using namespace llvm;
-using namespace llvm::PatternMatch;
-using ProfileCount = Function::ProfileCount;
-
-#define DEBUG_TYPE "code-extractor"
-
-// Provide a command-line option to aggregate function arguments into a struct
-// for functions produced by the code extractor. This is useful when converting
-// extracted functions to pthread-based code, as only one argument (void*) can
-// be passed in to pthread_create().
-static cl::opt<bool>
-AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
- cl::desc("Aggregate arguments to code-extracted functions"));
-
-/// Test whether a block is valid for extraction.
-static bool isBlockValidForExtraction(const BasicBlock &BB,
- const SetVector<BasicBlock *> &Result,
- bool AllowVarArgs, bool AllowAlloca) {
- // taking the address of a basic block moved to another function is illegal
- if (BB.hasAddressTaken())
- return false;
-
- // don't hoist code that uses another basicblock address, as it's likely to
- // lead to unexpected behavior, like cross-function jumps
- SmallPtrSet<User const *, 16> Visited;
- SmallVector<User const *, 16> ToVisit;
-
- for (Instruction const &Inst : BB)
- ToVisit.push_back(&Inst);
-
- while (!ToVisit.empty()) {
- User const *Curr = ToVisit.pop_back_val();
- if (!Visited.insert(Curr).second)
- continue;
- if (isa<BlockAddress const>(Curr))
- return false; // even a reference to self is likely to be not compatible
-
- if (isa<Instruction>(Curr) && cast<Instruction>(Curr)->getParent() != &BB)
- continue;
-
- for (auto const &U : Curr->operands()) {
- if (auto *UU = dyn_cast<User>(U))
- ToVisit.push_back(UU);
- }
- }
-
- // If explicitly requested, allow vastart and alloca. For invoke instructions
- // verify that extraction is valid.
- for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
- if (isa<AllocaInst>(I)) {
- if (!AllowAlloca)
- return false;
- continue;
- }
-
- if (const auto *II = dyn_cast<InvokeInst>(I)) {
- // Unwind destination (either a landingpad, catchswitch, or cleanuppad)
- // must be a part of the subgraph which is being extracted.
- if (auto *UBB = II->getUnwindDest())
- if (!Result.count(UBB))
- return false;
- continue;
- }
-
- // All catch handlers of a catchswitch instruction as well as the unwind
- // destination must be in the subgraph.
- if (const auto *CSI = dyn_cast<CatchSwitchInst>(I)) {
- if (auto *UBB = CSI->getUnwindDest())
- if (!Result.count(UBB))
- return false;
- for (auto *HBB : CSI->handlers())
- if (!Result.count(const_cast<BasicBlock*>(HBB)))
- return false;
- continue;
- }
-
- // Make sure that entire catch handler is within subgraph. It is sufficient
- // to check that catch return's block is in the list.
- if (const auto *CPI = dyn_cast<CatchPadInst>(I)) {
- for (const auto *U : CPI->users())
- if (const auto *CRI = dyn_cast<CatchReturnInst>(U))
- if (!Result.count(const_cast<BasicBlock*>(CRI->getParent())))
- return false;
- continue;
- }
-
- // And do similar checks for cleanup handler - the entire handler must be
- // in subgraph which is going to be extracted. For cleanup return should
- // additionally check that the unwind destination is also in the subgraph.
- if (const auto *CPI = dyn_cast<CleanupPadInst>(I)) {
- for (const auto *U : CPI->users())
- if (const auto *CRI = dyn_cast<CleanupReturnInst>(U))
- if (!Result.count(const_cast<BasicBlock*>(CRI->getParent())))
- return false;
- continue;
- }
- if (const auto *CRI = dyn_cast<CleanupReturnInst>(I)) {
- if (auto *UBB = CRI->getUnwindDest())
- if (!Result.count(UBB))
- return false;
- continue;
- }
-
- if (const CallInst *CI = dyn_cast<CallInst>(I)) {
- if (const Function *F = CI->getCalledFunction()) {
- auto IID = F->getIntrinsicID();
- if (IID == Intrinsic::vastart) {
- if (AllowVarArgs)
- continue;
- else
- return false;
- }
-
- // Currently, we miscompile outlined copies of eh_typid_for. There are
- // proposals for fixing this in llvm.org/PR39545.
- if (IID == Intrinsic::eh_typeid_for)
- return false;
- }
- }
- }
-
- return true;
-}
-
-/// Build a set of blocks to extract if the input blocks are viable.
-static SetVector<BasicBlock *>
-buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
- bool AllowVarArgs, bool AllowAlloca) {
- assert(!BBs.empty() && "The set of blocks to extract must be non-empty");
- SetVector<BasicBlock *> Result;
-
- // Loop over the blocks, adding them to our set-vector, and aborting with an
- // empty set if we encounter invalid blocks.
- for (BasicBlock *BB : BBs) {
- // If this block is dead, don't process it.
- if (DT && !DT->isReachableFromEntry(BB))
- continue;
-
- if (!Result.insert(BB))
- llvm_unreachable("Repeated basic blocks in extraction input");
- }
-
- LLVM_DEBUG(dbgs() << "Region front block: " << Result.front()->getName()
- << '\n');
-
- for (auto *BB : Result) {
- if (!isBlockValidForExtraction(*BB, Result, AllowVarArgs, AllowAlloca))
- return {};
-
- // Make sure that the first block is not a landing pad.
- if (BB == Result.front()) {
- if (BB->isEHPad()) {
- LLVM_DEBUG(dbgs() << "The first block cannot be an unwind block\n");
- return {};
- }
- continue;
- }
-
- // All blocks other than the first must not have predecessors outside of
- // the subgraph which is being extracted.
- for (auto *PBB : predecessors(BB))
- if (!Result.count(PBB)) {
- LLVM_DEBUG(dbgs() << "No blocks in this region may have entries from "
- "outside the region except for the first block!\n"
- << "Problematic source BB: " << BB->getName() << "\n"
- << "Problematic destination BB: " << PBB->getName()
- << "\n");
- return {};
- }
- }
-
- return Result;
-}
-
-CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
- bool AggregateArgs, BlockFrequencyInfo *BFI,
- BranchProbabilityInfo *BPI, AssumptionCache *AC,
- bool AllowVarArgs, bool AllowAlloca,
- std::string Suffix)
- : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
- BPI(BPI), AC(AC), AllowVarArgs(AllowVarArgs),
- Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)),
- Suffix(Suffix) {}
-
-CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
- BlockFrequencyInfo *BFI,
- BranchProbabilityInfo *BPI, AssumptionCache *AC,
- std::string Suffix)
- : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
- BPI(BPI), AC(AC), AllowVarArgs(false),
- Blocks(buildExtractionBlockSet(L.getBlocks(), &DT,
- /* AllowVarArgs */ false,
- /* AllowAlloca */ false)),
- Suffix(Suffix) {}
-
-/// definedInRegion - Return true if the specified value is defined in the
-/// extracted region.
-static bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) {
- if (Instruction *I = dyn_cast<Instruction>(V))
- if (Blocks.count(I->getParent()))
- return true;
- return false;
-}
-
-/// definedInCaller - Return true if the specified value is defined in the
-/// function being code extracted, but not in the region being extracted.
-/// These values must be passed in as live-ins to the function.
-static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) {
- if (isa<Argument>(V)) return true;
- if (Instruction *I = dyn_cast<Instruction>(V))
- if (!Blocks.count(I->getParent()))
- return true;
- return false;
-}
-
-static BasicBlock *getCommonExitBlock(const SetVector<BasicBlock *> &Blocks) {
- BasicBlock *CommonExitBlock = nullptr;
- auto hasNonCommonExitSucc = [&](BasicBlock *Block) {
- for (auto *Succ : successors(Block)) {
- // Internal edges, ok.
- if (Blocks.count(Succ))
- continue;
- if (!CommonExitBlock) {
- CommonExitBlock = Succ;
- continue;
- }
- if (CommonExitBlock == Succ)
- continue;
-
- return true;
- }
- return false;
- };
-
- if (any_of(Blocks, hasNonCommonExitSucc))
- return nullptr;
-
- return CommonExitBlock;
-}
-
-bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
- Instruction *Addr) const {
- AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets());
- Function *Func = (*Blocks.begin())->getParent();
- for (BasicBlock &BB : *Func) {
- if (Blocks.count(&BB))
- continue;
- for (Instruction &II : BB) {
- if (isa<DbgInfoIntrinsic>(II))
- continue;
-
- unsigned Opcode = II.getOpcode();
- Value *MemAddr = nullptr;
- switch (Opcode) {
- case Instruction::Store:
- case Instruction::Load: {
- if (Opcode == Instruction::Store) {
- StoreInst *SI = cast<StoreInst>(&II);
- MemAddr = SI->getPointerOperand();
- } else {
- LoadInst *LI = cast<LoadInst>(&II);
- MemAddr = LI->getPointerOperand();
- }
- // Global variable can not be aliased with locals.
- if (dyn_cast<Constant>(MemAddr))
- break;
- Value *Base = MemAddr->stripInBoundsConstantOffsets();
- if (!isa<AllocaInst>(Base) || Base == AI)
- return false;
- break;
- }
- default: {
- IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II);
- if (IntrInst) {
- if (IntrInst->isLifetimeStartOrEnd())
- break;
- return false;
- }
- // Treat all the other cases conservatively if it has side effects.
- if (II.mayHaveSideEffects())
- return false;
- }
- }
- }
- }
-
- return true;
-}
-
-BasicBlock *
-CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) {
- BasicBlock *SinglePredFromOutlineRegion = nullptr;
- assert(!Blocks.count(CommonExitBlock) &&
- "Expect a block outside the region!");
- for (auto *Pred : predecessors(CommonExitBlock)) {
- if (!Blocks.count(Pred))
- continue;
- if (!SinglePredFromOutlineRegion) {
- SinglePredFromOutlineRegion = Pred;
- } else if (SinglePredFromOutlineRegion != Pred) {
- SinglePredFromOutlineRegion = nullptr;
- break;
- }
- }
-
- if (SinglePredFromOutlineRegion)
- return SinglePredFromOutlineRegion;
-
-#ifndef NDEBUG
- auto getFirstPHI = [](BasicBlock *BB) {
- BasicBlock::iterator I = BB->begin();
- PHINode *FirstPhi = nullptr;
- while (I != BB->end()) {
- PHINode *Phi = dyn_cast<PHINode>(I);
- if (!Phi)
- break;
- if (!FirstPhi) {
- FirstPhi = Phi;
- break;
- }
- }
- return FirstPhi;
- };
- // If there are any phi nodes, the single pred either exists or has already
- // be created before code extraction.
- assert(!getFirstPHI(CommonExitBlock) && "Phi not expected");
-#endif
-
- BasicBlock *NewExitBlock = CommonExitBlock->splitBasicBlock(
- CommonExitBlock->getFirstNonPHI()->getIterator());
-
- for (auto PI = pred_begin(CommonExitBlock), PE = pred_end(CommonExitBlock);
- PI != PE;) {
- BasicBlock *Pred = *PI++;
- if (Blocks.count(Pred))
- continue;
- Pred->getTerminator()->replaceUsesOfWith(CommonExitBlock, NewExitBlock);
- }
- // Now add the old exit block to the outline region.
- Blocks.insert(CommonExitBlock);
- return CommonExitBlock;
-}
-
-// Find the pair of life time markers for address 'Addr' that are either
-// defined inside the outline region or can legally be shrinkwrapped into the
-// outline region. If there are not other untracked uses of the address, return
-// the pair of markers if found; otherwise return a pair of nullptr.
-CodeExtractor::LifetimeMarkerInfo
-CodeExtractor::getLifetimeMarkers(Instruction *Addr,
- BasicBlock *ExitBlock) const {
- LifetimeMarkerInfo Info;
-
- for (User *U : Addr->users()) {
- IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
- if (IntrInst) {
- if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) {
- // Do not handle the case where Addr has multiple start markers.
- if (Info.LifeStart)
- return {};
- Info.LifeStart = IntrInst;
- }
- if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) {
- if (Info.LifeEnd)
- return {};
- Info.LifeEnd = IntrInst;
- }
- continue;
- }
- // Find untracked uses of the address, bail.
- if (!definedInRegion(Blocks, U))
- return {};
- }
-
- if (!Info.LifeStart || !Info.LifeEnd)
- return {};
-
- Info.SinkLifeStart = !definedInRegion(Blocks, Info.LifeStart);
- Info.HoistLifeEnd = !definedInRegion(Blocks, Info.LifeEnd);
- // Do legality check.
- if ((Info.SinkLifeStart || Info.HoistLifeEnd) &&
- !isLegalToShrinkwrapLifetimeMarkers(Addr))
- return {};
-
- // Check to see if we have a place to do hoisting, if not, bail.
- if (Info.HoistLifeEnd && !ExitBlock)
- return {};
-
- return Info;
-}
-
-void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
- BasicBlock *&ExitBlock) const {
- Function *Func = (*Blocks.begin())->getParent();
- ExitBlock = getCommonExitBlock(Blocks);
-
- auto moveOrIgnoreLifetimeMarkers =
- [&](const LifetimeMarkerInfo &LMI) -> bool {
- if (!LMI.LifeStart)
- return false;
- if (LMI.SinkLifeStart) {
- LLVM_DEBUG(dbgs() << "Sinking lifetime.start: " << *LMI.LifeStart
- << "\n");
- SinkCands.insert(LMI.LifeStart);
- }
- if (LMI.HoistLifeEnd) {
- LLVM_DEBUG(dbgs() << "Hoisting lifetime.end: " << *LMI.LifeEnd << "\n");
- HoistCands.insert(LMI.LifeEnd);
- }
- return true;
- };
-
- for (BasicBlock &BB : *Func) {
- if (Blocks.count(&BB))
- continue;
- for (Instruction &II : BB) {
- auto *AI = dyn_cast<AllocaInst>(&II);
- if (!AI)
- continue;
-
- LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(AI, ExitBlock);
- bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo);
- if (Moved) {
- LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n");
- SinkCands.insert(AI);
- continue;
- }
-
- // Follow any bitcasts.
- SmallVector<Instruction *, 2> Bitcasts;
- SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo;
- for (User *U : AI->users()) {
- if (U->stripInBoundsConstantOffsets() == AI) {
- Instruction *Bitcast = cast<Instruction>(U);
- LifetimeMarkerInfo LMI = getLifetimeMarkers(Bitcast, ExitBlock);
- if (LMI.LifeStart) {
- Bitcasts.push_back(Bitcast);
- BitcastLifetimeInfo.push_back(LMI);
- continue;
- }
- }
-
- // Found unknown use of AI.
- if (!definedInRegion(Blocks, U)) {
- Bitcasts.clear();
- break;
- }
- }
-
- // Either no bitcasts reference the alloca or there are unknown uses.
- if (Bitcasts.empty())
- continue;
-
- LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n");
- SinkCands.insert(AI);
- for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) {
- Instruction *BitcastAddr = Bitcasts[I];
- const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I];
- assert(LMI.LifeStart &&
- "Unsafe to sink bitcast without lifetime markers");
- moveOrIgnoreLifetimeMarkers(LMI);
- if (!definedInRegion(Blocks, BitcastAddr)) {
- LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr
- << "\n");
- SinkCands.insert(BitcastAddr);
- }
- }
- }
- }
-}
-
-void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
- const ValueSet &SinkCands) const {
- for (BasicBlock *BB : Blocks) {
- // If a used value is defined outside the region, it's an input. If an
- // instruction is used outside the region, it's an output.
- for (Instruction &II : *BB) {
- for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE;
- ++OI) {
- Value *V = *OI;
- if (!SinkCands.count(V) && definedInCaller(Blocks, V))
- Inputs.insert(V);
- }
-
- for (User *U : II.users())
- if (!definedInRegion(Blocks, U)) {
- Outputs.insert(&II);
- break;
- }
- }
- }
-}
-
-/// severSplitPHINodesOfEntry - If a PHI node has multiple inputs from outside
-/// of the region, we need to split the entry block of the region so that the
-/// PHI node is easier to deal with.
-void CodeExtractor::severSplitPHINodesOfEntry(BasicBlock *&Header) {
- unsigned NumPredsFromRegion = 0;
- unsigned NumPredsOutsideRegion = 0;
-
- if (Header != &Header->getParent()->getEntryBlock()) {
- PHINode *PN = dyn_cast<PHINode>(Header->begin());
- if (!PN) return; // No PHI nodes.
-
- // If the header node contains any PHI nodes, check to see if there is more
- // than one entry from outside the region. If so, we need to sever the
- // header block into two.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (Blocks.count(PN->getIncomingBlock(i)))
- ++NumPredsFromRegion;
- else
- ++NumPredsOutsideRegion;
-
- // If there is one (or fewer) predecessor from outside the region, we don't
- // need to do anything special.
- if (NumPredsOutsideRegion <= 1) return;
- }
-
- // Otherwise, we need to split the header block into two pieces: one
- // containing PHI nodes merging values from outside of the region, and a
- // second that contains all of the code for the block and merges back any
- // incoming values from inside of the region.
- BasicBlock *NewBB = SplitBlock(Header, Header->getFirstNonPHI(), DT);
-
- // We only want to code extract the second block now, and it becomes the new
- // header of the region.
- BasicBlock *OldPred = Header;
- Blocks.remove(OldPred);
- Blocks.insert(NewBB);
- Header = NewBB;
-
- // Okay, now we need to adjust the PHI nodes and any branches from within the
- // region to go to the new header block instead of the old header block.
- if (NumPredsFromRegion) {
- PHINode *PN = cast<PHINode>(OldPred->begin());
- // Loop over all of the predecessors of OldPred that are in the region,
- // changing them to branch to NewBB instead.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (Blocks.count(PN->getIncomingBlock(i))) {
- Instruction *TI = PN->getIncomingBlock(i)->getTerminator();
- TI->replaceUsesOfWith(OldPred, NewBB);
- }
-
- // Okay, everything within the region is now branching to the right block, we
- // just have to update the PHI nodes now, inserting PHI nodes into NewBB.
- BasicBlock::iterator AfterPHIs;
- for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {
- PHINode *PN = cast<PHINode>(AfterPHIs);
- // Create a new PHI node in the new region, which has an incoming value
- // from OldPred of PN.
- PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,
- PN->getName() + ".ce", &NewBB->front());
- PN->replaceAllUsesWith(NewPN);
- NewPN->addIncoming(PN, OldPred);
-
- // Loop over all of the incoming value in PN, moving them to NewPN if they
- // are from the extracted region.
- for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
- if (Blocks.count(PN->getIncomingBlock(i))) {
- NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i));
- PN->removeIncomingValue(i);
- --i;
- }
- }
- }
- }
-}
-
-/// severSplitPHINodesOfExits - if PHI nodes in exit blocks have inputs from
-/// outlined region, we split these PHIs on two: one with inputs from region
-/// and other with remaining incoming blocks; then first PHIs are placed in
-/// outlined region.
-void CodeExtractor::severSplitPHINodesOfExits(
- const SmallPtrSetImpl<BasicBlock *> &Exits) {
- for (BasicBlock *ExitBB : Exits) {
- BasicBlock *NewBB = nullptr;
-
- for (PHINode &PN : ExitBB->phis()) {
- // Find all incoming values from the outlining region.
- SmallVector<unsigned, 2> IncomingVals;
- for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
- if (Blocks.count(PN.getIncomingBlock(i)))
- IncomingVals.push_back(i);
-
- // Do not process PHI if there is one (or fewer) predecessor from region.
- // If PHI has exactly one predecessor from region, only this one incoming
- // will be replaced on codeRepl block, so it should be safe to skip PHI.
- if (IncomingVals.size() <= 1)
- continue;
-
- // Create block for new PHIs and add it to the list of outlined if it
- // wasn't done before.
- if (!NewBB) {
- NewBB = BasicBlock::Create(ExitBB->getContext(),
- ExitBB->getName() + ".split",
- ExitBB->getParent(), ExitBB);
- SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBB),
- pred_end(ExitBB));
- for (BasicBlock *PredBB : Preds)
- if (Blocks.count(PredBB))
- PredBB->getTerminator()->replaceUsesOfWith(ExitBB, NewBB);
- BranchInst::Create(ExitBB, NewBB);
- Blocks.insert(NewBB);
- }
-
- // Split this PHI.
- PHINode *NewPN =
- PHINode::Create(PN.getType(), IncomingVals.size(),
- PN.getName() + ".ce", NewBB->getFirstNonPHI());
- for (unsigned i : IncomingVals)
- NewPN->addIncoming(PN.getIncomingValue(i), PN.getIncomingBlock(i));
- for (unsigned i : reverse(IncomingVals))
- PN.removeIncomingValue(i, false);
- PN.addIncoming(NewPN, NewBB);
- }
- }
-}
-
-void CodeExtractor::splitReturnBlocks() {
- for (BasicBlock *Block : Blocks)
- if (ReturnInst *RI = dyn_cast<ReturnInst>(Block->getTerminator())) {
- BasicBlock *New =
- Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret");
- if (DT) {
- // Old dominates New. New node dominates all other nodes dominated
- // by Old.
- DomTreeNode *OldNode = DT->getNode(Block);
- SmallVector<DomTreeNode *, 8> Children(OldNode->begin(),
- OldNode->end());
-
- DomTreeNode *NewNode = DT->addNewBlock(New, Block);
-
- for (DomTreeNode *I : Children)
- DT->changeImmediateDominator(I, NewNode);
- }
- }
-}
-
-/// constructFunction - make a function based on inputs and outputs, as follows:
-/// f(in0, ..., inN, out0, ..., outN)
-Function *CodeExtractor::constructFunction(const ValueSet &inputs,
- const ValueSet &outputs,
- BasicBlock *header,
- BasicBlock *newRootNode,
- BasicBlock *newHeader,
- Function *oldFunction,
- Module *M) {
- LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n");
- LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n");
-
- // This function returns unsigned, outputs will go back by reference.
- switch (NumExitBlocks) {
- case 0:
- case 1: RetTy = Type::getVoidTy(header->getContext()); break;
- case 2: RetTy = Type::getInt1Ty(header->getContext()); break;
- default: RetTy = Type::getInt16Ty(header->getContext()); break;
- }
-
- std::vector<Type *> paramTy;
-
- // Add the types of the input values to the function's argument list
- for (Value *value : inputs) {
- LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n");
- paramTy.push_back(value->getType());
- }
-
- // Add the types of the output values to the function's argument list.
- for (Value *output : outputs) {
- LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n");
- if (AggregateArgs)
- paramTy.push_back(output->getType());
- else
- paramTy.push_back(PointerType::getUnqual(output->getType()));
- }
-
- LLVM_DEBUG({
- dbgs() << "Function type: " << *RetTy << " f(";
- for (Type *i : paramTy)
- dbgs() << *i << ", ";
- dbgs() << ")\n";
- });
-
- StructType *StructTy;
- if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
- StructTy = StructType::get(M->getContext(), paramTy);
- paramTy.clear();
- paramTy.push_back(PointerType::getUnqual(StructTy));
- }
- FunctionType *funcType =
- FunctionType::get(RetTy, paramTy,
- AllowVarArgs && oldFunction->isVarArg());
-
- std::string SuffixToUse =
- Suffix.empty()
- ? (header->getName().empty() ? "extracted" : header->getName().str())
- : Suffix;
- // Create the new function
- Function *newFunction = Function::Create(
- funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(),
- oldFunction->getName() + "." + SuffixToUse, M);
- // If the old function is no-throw, so is the new one.
- if (oldFunction->doesNotThrow())
- newFunction->setDoesNotThrow();
-
- // Inherit the uwtable attribute if we need to.
- if (oldFunction->hasUWTable())
- newFunction->setHasUWTable();
-
- // Inherit all of the target dependent attributes and white-listed
- // target independent attributes.
- // (e.g. If the extracted region contains a call to an x86.sse
- // instruction we need to make sure that the extracted region has the
- // "target-features" attribute allowing it to be lowered.
- // FIXME: This should be changed to check to see if a specific
- // attribute can not be inherited.
- for (const auto &Attr : oldFunction->getAttributes().getFnAttributes()) {
- if (Attr.isStringAttribute()) {
- if (Attr.getKindAsString() == "thunk")
- continue;
- } else
- switch (Attr.getKindAsEnum()) {
- // Those attributes cannot be propagated safely. Explicitly list them
- // here so we get a warning if new attributes are added. This list also
- // includes non-function attributes.
- case Attribute::Alignment:
- case Attribute::AllocSize:
- case Attribute::ArgMemOnly:
- case Attribute::Builtin:
- case Attribute::ByVal:
- case Attribute::Convergent:
- case Attribute::Dereferenceable:
- case Attribute::DereferenceableOrNull:
- case Attribute::InAlloca:
- case Attribute::InReg:
- case Attribute::InaccessibleMemOnly:
- case Attribute::InaccessibleMemOrArgMemOnly:
- case Attribute::JumpTable:
- case Attribute::Naked:
- case Attribute::Nest:
- case Attribute::NoAlias:
- case Attribute::NoBuiltin:
- case Attribute::NoCapture:
- case Attribute::NoReturn:
- case Attribute::NoSync:
- case Attribute::None:
- case Attribute::NonNull:
- case Attribute::ReadNone:
- case Attribute::ReadOnly:
- case Attribute::Returned:
- case Attribute::ReturnsTwice:
- case Attribute::SExt:
- case Attribute::Speculatable:
- case Attribute::StackAlignment:
- case Attribute::StructRet:
- case Attribute::SwiftError:
- case Attribute::SwiftSelf:
- case Attribute::WillReturn:
- case Attribute::WriteOnly:
- case Attribute::ZExt:
- case Attribute::ImmArg:
- case Attribute::EndAttrKinds:
- continue;
- // Those attributes should be safe to propagate to the extracted function.
- case Attribute::AlwaysInline:
- case Attribute::Cold:
- case Attribute::NoRecurse:
- case Attribute::InlineHint:
- case Attribute::MinSize:
- case Attribute::NoDuplicate:
- case Attribute::NoFree:
- case Attribute::NoImplicitFloat:
- case Attribute::NoInline:
- case Attribute::NonLazyBind:
- case Attribute::NoRedZone:
- case Attribute::NoUnwind:
- case Attribute::OptForFuzzing:
- case Attribute::OptimizeNone:
- case Attribute::OptimizeForSize:
- case Attribute::SafeStack:
- case Attribute::ShadowCallStack:
- case Attribute::SanitizeAddress:
- case Attribute::SanitizeMemory:
- case Attribute::SanitizeThread:
- case Attribute::SanitizeHWAddress:
- case Attribute::SanitizeMemTag:
- case Attribute::SpeculativeLoadHardening:
- case Attribute::StackProtect:
- case Attribute::StackProtectReq:
- case Attribute::StackProtectStrong:
- case Attribute::StrictFP:
- case Attribute::UWTable:
- case Attribute::NoCfCheck:
- break;
- }
-
- newFunction->addFnAttr(Attr);
- }
- newFunction->getBasicBlockList().push_back(newRootNode);
-
- // Create an iterator to name all of the arguments we inserted.
- Function::arg_iterator AI = newFunction->arg_begin();
-
- // Rewrite all users of the inputs in the extracted region to use the
- // arguments (or appropriate addressing into struct) instead.
- for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
- Value *RewriteVal;
- if (AggregateArgs) {
- Value *Idx[2];
- Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
- Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
- Instruction *TI = newFunction->begin()->getTerminator();
- GetElementPtrInst *GEP = GetElementPtrInst::Create(
- StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI);
- RewriteVal = new LoadInst(StructTy->getElementType(i), GEP,
- "loadgep_" + inputs[i]->getName(), TI);
- } else
- RewriteVal = &*AI++;
-
- std::vector<User *> Users(inputs[i]->user_begin(), inputs[i]->user_end());
- for (User *use : Users)
- if (Instruction *inst = dyn_cast<Instruction>(use))
- if (Blocks.count(inst->getParent()))
- inst->replaceUsesOfWith(inputs[i], RewriteVal);
- }
-
- // Set names for input and output arguments.
- if (!AggregateArgs) {
- AI = newFunction->arg_begin();
- for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI)
- AI->setName(inputs[i]->getName());
- for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI)
- AI->setName(outputs[i]->getName()+".out");
- }
-
- // Rewrite branches to basic blocks outside of the loop to new dummy blocks
- // within the new function. This must be done before we lose track of which
- // blocks were originally in the code region.
- std::vector<User *> Users(header->user_begin(), header->user_end());
- for (unsigned i = 0, e = Users.size(); i != e; ++i)
- // The BasicBlock which contains the branch is not in the region
- // modify the branch target to a new block
- if (Instruction *I = dyn_cast<Instruction>(Users[i]))
- if (I->isTerminator() && !Blocks.count(I->getParent()) &&
- I->getParent()->getParent() == oldFunction)
- I->replaceUsesOfWith(header, newHeader);
-
- return newFunction;
-}
-
-/// Erase lifetime.start markers which reference inputs to the extraction
-/// region, and insert the referenced memory into \p LifetimesStart.
-///
-/// The extraction region is defined by a set of blocks (\p Blocks), and a set
-/// of allocas which will be moved from the caller function into the extracted
-/// function (\p SunkAllocas).
-static void eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
- const SetVector<Value *> &SunkAllocas,
- SetVector<Value *> &LifetimesStart) {
- for (BasicBlock *BB : Blocks) {
- for (auto It = BB->begin(), End = BB->end(); It != End;) {
- auto *II = dyn_cast<IntrinsicInst>(&*It);
- ++It;
- if (!II || !II->isLifetimeStartOrEnd())
- continue;
-
- // Get the memory operand of the lifetime marker. If the underlying
- // object is a sunk alloca, or is otherwise defined in the extraction
- // region, the lifetime marker must not be erased.
- Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
- if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
- continue;
-
- if (II->getIntrinsicID() == Intrinsic::lifetime_start)
- LifetimesStart.insert(Mem);
- II->eraseFromParent();
- }
- }
-}
-
-/// Insert lifetime start/end markers surrounding the call to the new function
-/// for objects defined in the caller.
-static void insertLifetimeMarkersSurroundingCall(
- Module *M, ArrayRef<Value *> LifetimesStart, ArrayRef<Value *> LifetimesEnd,
- CallInst *TheCall) {
- LLVMContext &Ctx = M->getContext();
- auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
- auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
- Instruction *Term = TheCall->getParent()->getTerminator();
-
- // The memory argument to a lifetime marker must be a i8*. Cache any bitcasts
- // needed to satisfy this requirement so they may be reused.
- DenseMap<Value *, Value *> Bitcasts;
-
- // Emit lifetime markers for the pointers given in \p Objects. Insert the
- // markers before the call if \p InsertBefore, and after the call otherwise.
- auto insertMarkers = [&](Function *MarkerFunc, ArrayRef<Value *> Objects,
- bool InsertBefore) {
- for (Value *Mem : Objects) {
- assert((!isa<Instruction>(Mem) || cast<Instruction>(Mem)->getFunction() ==
- TheCall->getFunction()) &&
- "Input memory not defined in original function");
- Value *&MemAsI8Ptr = Bitcasts[Mem];
- if (!MemAsI8Ptr) {
- if (Mem->getType() == Int8PtrTy)
- MemAsI8Ptr = Mem;
- else
- MemAsI8Ptr =
- CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
- }
-
- auto Marker = CallInst::Create(MarkerFunc, {NegativeOne, MemAsI8Ptr});
- if (InsertBefore)
- Marker->insertBefore(TheCall);
- else
- Marker->insertBefore(Term);
- }
- };
-
- if (!LifetimesStart.empty()) {
- auto StartFn = llvm::Intrinsic::getDeclaration(
- M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
- insertMarkers(StartFn, LifetimesStart, /*InsertBefore=*/true);
- }
-
- if (!LifetimesEnd.empty()) {
- auto EndFn = llvm::Intrinsic::getDeclaration(
- M, llvm::Intrinsic::lifetime_end, Int8PtrTy);
- insertMarkers(EndFn, LifetimesEnd, /*InsertBefore=*/false);
- }
-}
-
-/// emitCallAndSwitchStatement - This method sets up the caller side by adding
-/// the call instruction, splitting any PHI nodes in the header block as
-/// necessary.
-CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
- BasicBlock *codeReplacer,
- ValueSet &inputs,
- ValueSet &outputs) {
- // Emit a call to the new function, passing in: *pointer to struct (if
- // aggregating parameters), or plan inputs and allocated memory for outputs
- std::vector<Value *> params, StructValues, ReloadOutputs, Reloads;
-
- Module *M = newFunction->getParent();
- LLVMContext &Context = M->getContext();
- const DataLayout &DL = M->getDataLayout();
- CallInst *call = nullptr;
-
- // Add inputs as params, or to be filled into the struct
- unsigned ArgNo = 0;
- SmallVector<unsigned, 1> SwiftErrorArgs;
- for (Value *input : inputs) {
- if (AggregateArgs)
- StructValues.push_back(input);
- else {
- params.push_back(input);
- if (input->isSwiftError())
- SwiftErrorArgs.push_back(ArgNo);
- }
- ++ArgNo;
- }
-
- // Create allocas for the outputs
- for (Value *output : outputs) {
- if (AggregateArgs) {
- StructValues.push_back(output);
- } else {
- AllocaInst *alloca =
- new AllocaInst(output->getType(), DL.getAllocaAddrSpace(),
- nullptr, output->getName() + ".loc",
- &codeReplacer->getParent()->front().front());
- ReloadOutputs.push_back(alloca);
- params.push_back(alloca);
- }
- }
-
- StructType *StructArgTy = nullptr;
- AllocaInst *Struct = nullptr;
- if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
- std::vector<Type *> ArgTypes;
- for (ValueSet::iterator v = StructValues.begin(),
- ve = StructValues.end(); v != ve; ++v)
- ArgTypes.push_back((*v)->getType());
-
- // Allocate a struct at the beginning of this function
- StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
- Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr,
- "structArg",
- &codeReplacer->getParent()->front().front());
- params.push_back(Struct);
-
- for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
- Value *Idx[2];
- Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
- Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
- GetElementPtrInst *GEP = GetElementPtrInst::Create(
- StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName());
- codeReplacer->getInstList().push_back(GEP);
- StoreInst *SI = new StoreInst(StructValues[i], GEP);
- codeReplacer->getInstList().push_back(SI);
- }
- }
-
- // Emit the call to the function
- call = CallInst::Create(newFunction, params,
- NumExitBlocks > 1 ? "targetBlock" : "");
- // Add debug location to the new call, if the original function has debug
- // info. In that case, the terminator of the entry block of the extracted
- // function contains the first debug location of the extracted function,
- // set in extractCodeRegion.
- if (codeReplacer->getParent()->getSubprogram()) {
- if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc())
- call->setDebugLoc(DL);
- }
- codeReplacer->getInstList().push_back(call);
-
- // Set swifterror parameter attributes.
- for (unsigned SwiftErrArgNo : SwiftErrorArgs) {
- call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError);
- newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError);
- }
-
- Function::arg_iterator OutputArgBegin = newFunction->arg_begin();
- unsigned FirstOut = inputs.size();
- if (!AggregateArgs)
- std::advance(OutputArgBegin, inputs.size());
-
- // Reload the outputs passed in by reference.
- for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
- Value *Output = nullptr;
- if (AggregateArgs) {
- Value *Idx[2];
- Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
- Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
- GetElementPtrInst *GEP = GetElementPtrInst::Create(
- StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName());
- codeReplacer->getInstList().push_back(GEP);
- Output = GEP;
- } else {
- Output = ReloadOutputs[i];
- }
- LoadInst *load = new LoadInst(outputs[i]->getType(), Output,
- outputs[i]->getName() + ".reload");
- Reloads.push_back(load);
- codeReplacer->getInstList().push_back(load);
- std::vector<User *> Users(outputs[i]->user_begin(), outputs[i]->user_end());
- for (unsigned u = 0, e = Users.size(); u != e; ++u) {
- Instruction *inst = cast<Instruction>(Users[u]);
- if (!Blocks.count(inst->getParent()))
- inst->replaceUsesOfWith(outputs[i], load);
- }
- }
-
- // Now we can emit a switch statement using the call as a value.
- SwitchInst *TheSwitch =
- SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)),
- codeReplacer, 0, codeReplacer);
-
- // Since there may be multiple exits from the original region, make the new
- // function return an unsigned, switch on that number. This loop iterates
- // over all of the blocks in the extracted region, updating any terminator
- // instructions in the to-be-extracted region that branch to blocks that are
- // not in the region to be extracted.
- std::map<BasicBlock *, BasicBlock *> ExitBlockMap;
-
- unsigned switchVal = 0;
- for (BasicBlock *Block : Blocks) {
- Instruction *TI = Block->getTerminator();
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- if (!Blocks.count(TI->getSuccessor(i))) {
- BasicBlock *OldTarget = TI->getSuccessor(i);
- // add a new basic block which returns the appropriate value
- BasicBlock *&NewTarget = ExitBlockMap[OldTarget];
- if (!NewTarget) {
- // If we don't already have an exit stub for this non-extracted
- // destination, create one now!
- NewTarget = BasicBlock::Create(Context,
- OldTarget->getName() + ".exitStub",
- newFunction);
- unsigned SuccNum = switchVal++;
-
- Value *brVal = nullptr;
- switch (NumExitBlocks) {
- case 0:
- case 1: break; // No value needed.
- case 2: // Conditional branch, return a bool
- brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);
- break;
- default:
- brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);
- break;
- }
-
- ReturnInst::Create(Context, brVal, NewTarget);
-
- // Update the switch instruction.
- TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
- SuccNum),
- OldTarget);
- }
-
- // rewrite the original branch instruction with this new target
- TI->setSuccessor(i, NewTarget);
- }
- }
-
- // Store the arguments right after the definition of output value.
- // This should be proceeded after creating exit stubs to be ensure that invoke
- // result restore will be placed in the outlined function.
- Function::arg_iterator OAI = OutputArgBegin;
- for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
- auto *OutI = dyn_cast<Instruction>(outputs[i]);
- if (!OutI)
- continue;
-
- // Find proper insertion point.
- BasicBlock::iterator InsertPt;
- // In case OutI is an invoke, we insert the store at the beginning in the
- // 'normal destination' BB. Otherwise we insert the store right after OutI.
- if (auto *InvokeI = dyn_cast<InvokeInst>(OutI))
- InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt();
- else if (auto *Phi = dyn_cast<PHINode>(OutI))
- InsertPt = Phi->getParent()->getFirstInsertionPt();
- else
- InsertPt = std::next(OutI->getIterator());
-
- Instruction *InsertBefore = &*InsertPt;
- assert((InsertBefore->getFunction() == newFunction ||
- Blocks.count(InsertBefore->getParent())) &&
- "InsertPt should be in new function");
- assert(OAI != newFunction->arg_end() &&
- "Number of output arguments should match "
- "the amount of defined values");
- if (AggregateArgs) {
- Value *Idx[2];
- Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
- Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
- GetElementPtrInst *GEP = GetElementPtrInst::Create(
- StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(),
- InsertBefore);
- new StoreInst(outputs[i], GEP, InsertBefore);
- // Since there should be only one struct argument aggregating
- // all the output values, we shouldn't increment OAI, which always
- // points to the struct argument, in this case.
- } else {
- new StoreInst(outputs[i], &*OAI, InsertBefore);
- ++OAI;
- }
- }
-
- // Now that we've done the deed, simplify the switch instruction.
- Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType();
- switch (NumExitBlocks) {
- case 0:
- // There are no successors (the block containing the switch itself), which
- // means that previously this was the last part of the function, and hence
- // this should be rewritten as a `ret'
-
- // Check if the function should return a value
- if (OldFnRetTy->isVoidTy()) {
- ReturnInst::Create(Context, nullptr, TheSwitch); // Return void
- } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
- // return what we have
- ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch);
- } else {
- // Otherwise we must have code extracted an unwind or something, just
- // return whatever we want.
- ReturnInst::Create(Context,
- Constant::getNullValue(OldFnRetTy), TheSwitch);
- }
-
- TheSwitch->eraseFromParent();
- break;
- case 1:
- // Only a single destination, change the switch into an unconditional
- // branch.
- BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch);
- TheSwitch->eraseFromParent();
- break;
- case 2:
- BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2),
- call, TheSwitch);
- TheSwitch->eraseFromParent();
- break;
- default:
- // Otherwise, make the default destination of the switch instruction be one
- // of the other successors.
- TheSwitch->setCondition(call);
- TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks));
- // Remove redundant case
- TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1));
- break;
- }
-
- // Insert lifetime markers around the reloads of any output values. The
- // allocas output values are stored in are only in-use in the codeRepl block.
- insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call);
-
- return call;
-}
-
-void CodeExtractor::moveCodeToFunction(Function *newFunction) {
- Function *oldFunc = (*Blocks.begin())->getParent();
- Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList();
- Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList();
-
- for (BasicBlock *Block : Blocks) {
- // Delete the basic block from the old function, and the list of blocks
- oldBlocks.remove(Block);
-
- // Insert this basic block into the new function
- newBlocks.push_back(Block);
-
- // Remove @llvm.assume calls that were moved to the new function from the
- // old function's assumption cache.
- if (AC)
- for (auto &I : *Block)
- if (match(&I, m_Intrinsic<Intrinsic::assume>()))
- AC->unregisterAssumption(cast<CallInst>(&I));
- }
-}
-
-void CodeExtractor::calculateNewCallTerminatorWeights(
- BasicBlock *CodeReplacer,
- DenseMap<BasicBlock *, BlockFrequency> &ExitWeights,
- BranchProbabilityInfo *BPI) {
- using Distribution = BlockFrequencyInfoImplBase::Distribution;
- using BlockNode = BlockFrequencyInfoImplBase::BlockNode;
-
- // Update the branch weights for the exit block.
- Instruction *TI = CodeReplacer->getTerminator();
- SmallVector<unsigned, 8> BranchWeights(TI->getNumSuccessors(), 0);
-
- // Block Frequency distribution with dummy node.
- Distribution BranchDist;
-
- // Add each of the frequencies of the successors.
- for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) {
- BlockNode ExitNode(i);
- uint64_t ExitFreq = ExitWeights[TI->getSuccessor(i)].getFrequency();
- if (ExitFreq != 0)
- BranchDist.addExit(ExitNode, ExitFreq);
- else
- BPI->setEdgeProbability(CodeReplacer, i, BranchProbability::getZero());
- }
-
- // Check for no total weight.
- if (BranchDist.Total == 0)
- return;
-
- // Normalize the distribution so that they can fit in unsigned.
- BranchDist.normalize();
-
- // Create normalized branch weights and set the metadata.
- for (unsigned I = 0, E = BranchDist.Weights.size(); I < E; ++I) {
- const auto &Weight = BranchDist.Weights[I];
-
- // Get the weight and update the current BFI.
- BranchWeights[Weight.TargetNode.Index] = Weight.Amount;
- BranchProbability BP(Weight.Amount, BranchDist.Total);
- BPI->setEdgeProbability(CodeReplacer, Weight.TargetNode.Index, BP);
- }
- TI->setMetadata(
- LLVMContext::MD_prof,
- MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
-}
-
-Function *CodeExtractor::extractCodeRegion() {
- if (!isEligible())
- return nullptr;
-
- // Assumption: this is a single-entry code region, and the header is the first
- // block in the region.
- BasicBlock *header = *Blocks.begin();
- Function *oldFunction = header->getParent();
-
- // For functions with varargs, check that varargs handling is only done in the
- // outlined function, i.e vastart and vaend are only used in outlined blocks.
- if (AllowVarArgs && oldFunction->getFunctionType()->isVarArg()) {
- auto containsVarArgIntrinsic = [](Instruction &I) {
- if (const CallInst *CI = dyn_cast<CallInst>(&I))
- if (const Function *F = CI->getCalledFunction())
- return F->getIntrinsicID() == Intrinsic::vastart ||
- F->getIntrinsicID() == Intrinsic::vaend;
- return false;
- };
-
- for (auto &BB : *oldFunction) {
- if (Blocks.count(&BB))
- continue;
- if (llvm::any_of(BB, containsVarArgIntrinsic))
- return nullptr;
- }
- }
- ValueSet inputs, outputs, SinkingCands, HoistingCands;
- BasicBlock *CommonExit = nullptr;
-
- // Calculate the entry frequency of the new function before we change the root
- // block.
- BlockFrequency EntryFreq;
- if (BFI) {
- assert(BPI && "Both BPI and BFI are required to preserve profile info");
- for (BasicBlock *Pred : predecessors(header)) {
- if (Blocks.count(Pred))
- continue;
- EntryFreq +=
- BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header);
- }
- }
-
- // If we have any return instructions in the region, split those blocks so
- // that the return is not in the region.
- splitReturnBlocks();
-
- // Calculate the exit blocks for the extracted region and the total exit
- // weights for each of those blocks.
- DenseMap<BasicBlock *, BlockFrequency> ExitWeights;
- SmallPtrSet<BasicBlock *, 1> ExitBlocks;
- for (BasicBlock *Block : Blocks) {
- for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE;
- ++SI) {
- if (!Blocks.count(*SI)) {
- // Update the branch weight for this successor.
- if (BFI) {
- BlockFrequency &BF = ExitWeights[*SI];
- BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, *SI);
- }
- ExitBlocks.insert(*SI);
- }
- }
- }
- NumExitBlocks = ExitBlocks.size();
-
- // If we have to split PHI nodes of the entry or exit blocks, do so now.
- severSplitPHINodesOfEntry(header);
- severSplitPHINodesOfExits(ExitBlocks);
-
- // This takes place of the original loop
- BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
- "codeRepl", oldFunction,
- header);
-
- // The new function needs a root node because other nodes can branch to the
- // head of the region, but the entry node of a function cannot have preds.
- BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),
- "newFuncRoot");
- auto *BranchI = BranchInst::Create(header);
- // If the original function has debug info, we have to add a debug location
- // to the new branch instruction from the artificial entry block.
- // We use the debug location of the first instruction in the extracted
- // blocks, as there is no other equivalent line in the source code.
- if (oldFunction->getSubprogram()) {
- any_of(Blocks, [&BranchI](const BasicBlock *BB) {
- return any_of(*BB, [&BranchI](const Instruction &I) {
- if (!I.getDebugLoc())
- return false;
- BranchI->setDebugLoc(I.getDebugLoc());
- return true;
- });
- });
- }
- newFuncRoot->getInstList().push_back(BranchI);
-
- findAllocas(SinkingCands, HoistingCands, CommonExit);
- assert(HoistingCands.empty() || CommonExit);
-
- // Find inputs to, outputs from the code region.
- findInputsOutputs(inputs, outputs, SinkingCands);
-
- // Now sink all instructions which only have non-phi uses inside the region.
- // Group the allocas at the start of the block, so that any bitcast uses of
- // the allocas are well-defined.
- AllocaInst *FirstSunkAlloca = nullptr;
- for (auto *II : SinkingCands) {
- if (auto *AI = dyn_cast<AllocaInst>(II)) {
- AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt());
- if (!FirstSunkAlloca)
- FirstSunkAlloca = AI;
- }
- }
- assert((SinkingCands.empty() || FirstSunkAlloca) &&
- "Did not expect a sink candidate without any allocas");
- for (auto *II : SinkingCands) {
- if (!isa<AllocaInst>(II)) {
- cast<Instruction>(II)->moveAfter(FirstSunkAlloca);
- }
- }
-
- if (!HoistingCands.empty()) {
- auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit);
- Instruction *TI = HoistToBlock->getTerminator();
- for (auto *II : HoistingCands)
- cast<Instruction>(II)->moveBefore(TI);
- }
-
- // Collect objects which are inputs to the extraction region and also
- // referenced by lifetime start markers within it. The effects of these
- // markers must be replicated in the calling function to prevent the stack
- // coloring pass from merging slots which store input objects.
- ValueSet LifetimesStart;
- eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart);
-
- // Construct new function based on inputs/outputs & add allocas for all defs.
- Function *newFunction =
- constructFunction(inputs, outputs, header, newFuncRoot, codeReplacer,
- oldFunction, oldFunction->getParent());
-
- // Update the entry count of the function.
- if (BFI) {
- auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency());
- if (Count.hasValue())
- newFunction->setEntryCount(
- ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME
- BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency());
- }
-
- CallInst *TheCall =
- emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
-
- moveCodeToFunction(newFunction);
-
- // Replicate the effects of any lifetime start/end markers which referenced
- // input objects in the extraction region by placing markers around the call.
- insertLifetimeMarkersSurroundingCall(
- oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, TheCall);
-
- // Propagate personality info to the new function if there is one.
- if (oldFunction->hasPersonalityFn())
- newFunction->setPersonalityFn(oldFunction->getPersonalityFn());
-
- // Update the branch weights for the exit block.
- if (BFI && NumExitBlocks > 1)
- calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI);
-
- // Loop over all of the PHI nodes in the header and exit blocks, and change
- // any references to the old incoming edge to be the new incoming edge.
- for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (!Blocks.count(PN->getIncomingBlock(i)))
- PN->setIncomingBlock(i, newFuncRoot);
- }
-
- for (BasicBlock *ExitBB : ExitBlocks)
- for (PHINode &PN : ExitBB->phis()) {
- Value *IncomingCodeReplacerVal = nullptr;
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
- // Ignore incoming values from outside of the extracted region.
- if (!Blocks.count(PN.getIncomingBlock(i)))
- continue;
-
- // Ensure that there is only one incoming value from codeReplacer.
- if (!IncomingCodeReplacerVal) {
- PN.setIncomingBlock(i, codeReplacer);
- IncomingCodeReplacerVal = PN.getIncomingValue(i);
- } else
- assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) &&
- "PHI has two incompatbile incoming values from codeRepl");
- }
- }
-
- // Erase debug info intrinsics. Variable updates within the new function are
- // invisible to debuggers. This could be improved by defining a DISubprogram
- // for the new function.
- for (BasicBlock &BB : *newFunction) {
- auto BlockIt = BB.begin();
- // Remove debug info intrinsics from the new function.
- while (BlockIt != BB.end()) {
- Instruction *Inst = &*BlockIt;
- ++BlockIt;
- if (isa<DbgInfoIntrinsic>(Inst))
- Inst->eraseFromParent();
- }
- // Remove debug info intrinsics which refer to values in the new function
- // from the old function.
- SmallVector<DbgVariableIntrinsic *, 4> DbgUsers;
- for (Instruction &I : BB)
- findDbgUsers(DbgUsers, &I);
- for (DbgVariableIntrinsic *DVI : DbgUsers)
- DVI->eraseFromParent();
- }
-
- // Mark the new function `noreturn` if applicable. Terminators which resume
- // exception propagation are treated as returning instructions. This is to
- // avoid inserting traps after calls to outlined functions which unwind.
- bool doesNotReturn = none_of(*newFunction, [](const BasicBlock &BB) {
- const Instruction *Term = BB.getTerminator();
- return isa<ReturnInst>(Term) || isa<ResumeInst>(Term);
- });
- if (doesNotReturn)
- newFunction->setDoesNotReturn();
-
- LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) {
- newFunction->dump();
- report_fatal_error("verification of newFunction failed!");
- });
- LLVM_DEBUG(if (verifyFunction(*oldFunction))
- report_fatal_error("verification of oldFunction failed!"));
- return newFunction;
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
deleted file mode 100644
index 069a86f6ab33..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-//===- CtorUtils.cpp - Helpers for working with global_ctors ----*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines functions that are used to process llvm.global_ctors.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/CtorUtils.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define DEBUG_TYPE "ctor_utils"
-
-using namespace llvm;
-
-/// Given a specified llvm.global_ctors list, remove the listed elements.
-static void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) {
- // Filter out the initializer elements to remove.
- ConstantArray *OldCA = cast<ConstantArray>(GCL->getInitializer());
- SmallVector<Constant *, 10> CAList;
- for (unsigned I = 0, E = OldCA->getNumOperands(); I < E; ++I)
- if (!CtorsToRemove.test(I))
- CAList.push_back(OldCA->getOperand(I));
-
- // Create the new array initializer.
- ArrayType *ATy =
- ArrayType::get(OldCA->getType()->getElementType(), CAList.size());
- Constant *CA = ConstantArray::get(ATy, CAList);
-
- // If we didn't change the number of elements, don't create a new GV.
- if (CA->getType() == OldCA->getType()) {
- GCL->setInitializer(CA);
- return;
- }
-
- // Create the new global and insert it next to the existing list.
- GlobalVariable *NGV =
- new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(),
- CA, "", GCL->getThreadLocalMode());
- GCL->getParent()->getGlobalList().insert(GCL->getIterator(), NGV);
- NGV->takeName(GCL);
-
- // Nuke the old list, replacing any uses with the new one.
- if (!GCL->use_empty()) {
- Constant *V = NGV;
- if (V->getType() != GCL->getType())
- V = ConstantExpr::getBitCast(V, GCL->getType());
- GCL->replaceAllUsesWith(V);
- }
- GCL->eraseFromParent();
-}
-
-/// Given a llvm.global_ctors list that we can understand,
-/// return a list of the functions and null terminator as a vector.
-static std::vector<Function *> parseGlobalCtors(GlobalVariable *GV) {
- if (GV->getInitializer()->isNullValue())
- return std::vector<Function *>();
- ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
- std::vector<Function *> Result;
- Result.reserve(CA->getNumOperands());
- for (auto &V : CA->operands()) {
- ConstantStruct *CS = cast<ConstantStruct>(V);
- Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
- }
- return Result;
-}
-
-/// Find the llvm.global_ctors list, verifying that all initializers have an
-/// init priority of 65535.
-static GlobalVariable *findGlobalCtors(Module &M) {
- GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
- if (!GV)
- return nullptr;
-
- // Verify that the initializer is simple enough for us to handle. We are
- // only allowed to optimize the initializer if it is unique.
- if (!GV->hasUniqueInitializer())
- return nullptr;
-
- if (isa<ConstantAggregateZero>(GV->getInitializer()))
- return GV;
- ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
-
- for (auto &V : CA->operands()) {
- if (isa<ConstantAggregateZero>(V))
- continue;
- ConstantStruct *CS = cast<ConstantStruct>(V);
- if (isa<ConstantPointerNull>(CS->getOperand(1)))
- continue;
-
- // Must have a function or null ptr.
- if (!isa<Function>(CS->getOperand(1)))
- return nullptr;
-
- // Init priority must be standard.
- ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0));
- if (CI->getZExtValue() != 65535)
- return nullptr;
- }
-
- return GV;
-}
-
-/// Call "ShouldRemove" for every entry in M's global_ctor list and remove the
-/// entries for which it returns true. Return true if anything changed.
-bool llvm::optimizeGlobalCtorsList(
- Module &M, function_ref<bool(Function *)> ShouldRemove) {
- GlobalVariable *GlobalCtors = findGlobalCtors(M);
- if (!GlobalCtors)
- return false;
-
- std::vector<Function *> Ctors = parseGlobalCtors(GlobalCtors);
- if (Ctors.empty())
- return false;
-
- bool MadeChange = false;
-
- // Loop over global ctors, optimizing them when we can.
- unsigned NumCtors = Ctors.size();
- BitVector CtorsToRemove(NumCtors);
- for (unsigned i = 0; i != Ctors.size() && NumCtors > 0; ++i) {
- Function *F = Ctors[i];
- // Found a null terminator in the middle of the list, prune off the rest of
- // the list.
- if (!F)
- continue;
-
- LLVM_DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");
-
- // We cannot simplify external ctor functions.
- if (F->empty())
- continue;
-
- // If we can evaluate the ctor at compile time, do.
- if (ShouldRemove(F)) {
- Ctors[i] = nullptr;
- CtorsToRemove.set(i);
- NumCtors--;
- MadeChange = true;
- continue;
- }
- }
-
- if (!MadeChange)
- return false;
-
- removeGlobalCtors(GlobalCtors, CtorsToRemove);
- return true;
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
deleted file mode 100644
index 5f53d794fe8a..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Analysis/CFG.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-using namespace llvm;
-
-/// DemoteRegToStack - This function takes a virtual register computed by an
-/// Instruction and replaces it with a slot in the stack frame, allocated via
-/// alloca. This allows the CFG to be changed around without fear of
-/// invalidating the SSA information for the value. It returns the pointer to
-/// the alloca inserted to create a stack slot for I.
-AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
- Instruction *AllocaPoint) {
- if (I.use_empty()) {
- I.eraseFromParent();
- return nullptr;
- }
-
- Function *F = I.getParent()->getParent();
- const DataLayout &DL = F->getParent()->getDataLayout();
-
- // Create a stack slot to hold the value.
- AllocaInst *Slot;
- if (AllocaPoint) {
- Slot = new AllocaInst(I.getType(), DL.getAllocaAddrSpace(), nullptr,
- I.getName()+".reg2mem", AllocaPoint);
- } else {
- Slot = new AllocaInst(I.getType(), DL.getAllocaAddrSpace(), nullptr,
- I.getName() + ".reg2mem", &F->getEntryBlock().front());
- }
-
- // We cannot demote invoke instructions to the stack if their normal edge
- // is critical. Therefore, split the critical edge and create a basic block
- // into which the store can be inserted.
- if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
- if (!II->getNormalDest()->getSinglePredecessor()) {
- unsigned SuccNum = GetSuccessorNumber(II->getParent(), II->getNormalDest());
- assert(isCriticalEdge(II, SuccNum) && "Expected a critical edge!");
- BasicBlock *BB = SplitCriticalEdge(II, SuccNum);
- assert(BB && "Unable to split critical edge.");
- (void)BB;
- }
- }
-
- // Change all of the users of the instruction to read from the stack slot.
- while (!I.use_empty()) {
- Instruction *U = cast<Instruction>(I.user_back());
- if (PHINode *PN = dyn_cast<PHINode>(U)) {
- // If this is a PHI node, we can't insert a load of the value before the
- // use. Instead insert the load in the predecessor block corresponding
- // to the incoming value.
- //
- // Note that if there are multiple edges from a basic block to this PHI
- // node that we cannot have multiple loads. The problem is that the
- // resulting PHI node will have multiple values (from each load) coming in
- // from the same block, which is illegal SSA form. For this reason, we
- // keep track of and reuse loads we insert.
- DenseMap<BasicBlock*, Value*> Loads;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) == &I) {
- Value *&V = Loads[PN->getIncomingBlock(i)];
- if (!V) {
- // Insert the load into the predecessor block
- V = new LoadInst(I.getType(), Slot, I.getName() + ".reload",
- VolatileLoads,
- PN->getIncomingBlock(i)->getTerminator());
- }
- PN->setIncomingValue(i, V);
- }
-
- } else {
- // If this is a normal instruction, just insert a load.
- Value *V = new LoadInst(I.getType(), Slot, I.getName() + ".reload",
- VolatileLoads, U);
- U->replaceUsesOfWith(&I, V);
- }
- }
-
- // Insert stores of the computed value into the stack slot. We have to be
- // careful if I is an invoke instruction, because we can't insert the store
- // AFTER the terminator instruction.
- BasicBlock::iterator InsertPt;
- if (!I.isTerminator()) {
- InsertPt = ++I.getIterator();
- for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
- /* empty */; // Don't insert before PHI nodes or landingpad instrs.
- } else {
- InvokeInst &II = cast<InvokeInst>(I);
- InsertPt = II.getNormalDest()->getFirstInsertionPt();
- }
-
- new StoreInst(&I, Slot, &*InsertPt);
- return Slot;
-}
-
-/// DemotePHIToStack - This function takes a virtual register computed by a PHI
-/// node and replaces it with a slot in the stack frame allocated via alloca.
-/// The PHI node is deleted. It returns the pointer to the alloca inserted.
-AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
- if (P->use_empty()) {
- P->eraseFromParent();
- return nullptr;
- }
-
- const DataLayout &DL = P->getModule()->getDataLayout();
-
- // Create a stack slot to hold the value.
- AllocaInst *Slot;
- if (AllocaPoint) {
- Slot = new AllocaInst(P->getType(), DL.getAllocaAddrSpace(), nullptr,
- P->getName()+".reg2mem", AllocaPoint);
- } else {
- Function *F = P->getParent()->getParent();
- Slot = new AllocaInst(P->getType(), DL.getAllocaAddrSpace(), nullptr,
- P->getName() + ".reg2mem",
- &F->getEntryBlock().front());
- }
-
- // Iterate over each operand inserting a store in each predecessor.
- for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
- if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) {
- assert(II->getParent() != P->getIncomingBlock(i) &&
- "Invoke edge not supported yet"); (void)II;
- }
- new StoreInst(P->getIncomingValue(i), Slot,
- P->getIncomingBlock(i)->getTerminator());
- }
-
- // Insert a load in place of the PHI and replace all uses.
- BasicBlock::iterator InsertPt = P->getIterator();
-
- for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
- /* empty */; // Don't insert before PHI nodes or landingpad instrs.
-
- Value *V =
- new LoadInst(P->getType(), Slot, P->getName() + ".reload", &*InsertPt);
- P->replaceAllUsesWith(V);
-
- // Delete PHI.
- P->eraseFromParent();
- return Slot;
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/contrib/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
deleted file mode 100644
index 4aa40eeadda4..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ /dev/null
@@ -1,171 +0,0 @@
-//===- EntryExitInstrumenter.cpp - Function Entry/Exit Instrumentation ----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils.h"
-using namespace llvm;
-
-static void insertCall(Function &CurFn, StringRef Func,
- Instruction *InsertionPt, DebugLoc DL) {
- Module &M = *InsertionPt->getParent()->getParent()->getParent();
- LLVMContext &C = InsertionPt->getParent()->getContext();
-
- if (Func == "mcount" ||
- Func == ".mcount" ||
- Func == "\01__gnu_mcount_nc" ||
- Func == "\01_mcount" ||
- Func == "\01mcount" ||
- Func == "__mcount" ||
- Func == "_mcount" ||
- Func == "__cyg_profile_func_enter_bare") {
- FunctionCallee Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C));
- CallInst *Call = CallInst::Create(Fn, "", InsertionPt);
- Call->setDebugLoc(DL);
- return;
- }
-
- if (Func == "__cyg_profile_func_enter" || Func == "__cyg_profile_func_exit") {
- Type *ArgTypes[] = {Type::getInt8PtrTy(C), Type::getInt8PtrTy(C)};
-
- FunctionCallee Fn = M.getOrInsertFunction(
- Func, FunctionType::get(Type::getVoidTy(C), ArgTypes, false));
-
- Instruction *RetAddr = CallInst::Create(
- Intrinsic::getDeclaration(&M, Intrinsic::returnaddress),
- ArrayRef<Value *>(ConstantInt::get(Type::getInt32Ty(C), 0)), "",
- InsertionPt);
- RetAddr->setDebugLoc(DL);
-
- Value *Args[] = {ConstantExpr::getBitCast(&CurFn, Type::getInt8PtrTy(C)),
- RetAddr};
-
- CallInst *Call =
- CallInst::Create(Fn, ArrayRef<Value *>(Args), "", InsertionPt);
- Call->setDebugLoc(DL);
- return;
- }
-
- // We only know how to call a fixed set of instrumentation functions, because
- // they all expect different arguments, etc.
- report_fatal_error(Twine("Unknown instrumentation function: '") + Func + "'");
-}
-
-static bool runOnFunction(Function &F, bool PostInlining) {
- StringRef EntryAttr = PostInlining ? "instrument-function-entry-inlined"
- : "instrument-function-entry";
-
- StringRef ExitAttr = PostInlining ? "instrument-function-exit-inlined"
- : "instrument-function-exit";
-
- StringRef EntryFunc = F.getFnAttribute(EntryAttr).getValueAsString();
- StringRef ExitFunc = F.getFnAttribute(ExitAttr).getValueAsString();
-
- bool Changed = false;
-
- // If the attribute is specified, insert instrumentation and then "consume"
- // the attribute so that it's not inserted again if the pass should happen to
- // run later for some reason.
-
- if (!EntryFunc.empty()) {
- DebugLoc DL;
- if (auto SP = F.getSubprogram())
- DL = DebugLoc::get(SP->getScopeLine(), 0, SP);
-
- insertCall(F, EntryFunc, &*F.begin()->getFirstInsertionPt(), DL);
- Changed = true;
- F.removeAttribute(AttributeList::FunctionIndex, EntryAttr);
- }
-
- if (!ExitFunc.empty()) {
- for (BasicBlock &BB : F) {
- Instruction *T = BB.getTerminator();
- if (!isa<ReturnInst>(T))
- continue;
-
- // If T is preceded by a musttail call, that's the real terminator.
- Instruction *Prev = T->getPrevNode();
- if (BitCastInst *BCI = dyn_cast_or_null<BitCastInst>(Prev))
- Prev = BCI->getPrevNode();
- if (CallInst *CI = dyn_cast_or_null<CallInst>(Prev)) {
- if (CI->isMustTailCall())
- T = CI;
- }
-
- DebugLoc DL;
- if (DebugLoc TerminatorDL = T->getDebugLoc())
- DL = TerminatorDL;
- else if (auto SP = F.getSubprogram())
- DL = DebugLoc::get(0, 0, SP);
-
- insertCall(F, ExitFunc, T, DL);
- Changed = true;
- }
- F.removeAttribute(AttributeList::FunctionIndex, ExitAttr);
- }
-
- return Changed;
-}
-
-namespace {
-struct EntryExitInstrumenter : public FunctionPass {
- static char ID;
- EntryExitInstrumenter() : FunctionPass(ID) {
- initializeEntryExitInstrumenterPass(*PassRegistry::getPassRegistry());
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
- bool runOnFunction(Function &F) override { return ::runOnFunction(F, false); }
-};
-char EntryExitInstrumenter::ID = 0;
-
-struct PostInlineEntryExitInstrumenter : public FunctionPass {
- static char ID;
- PostInlineEntryExitInstrumenter() : FunctionPass(ID) {
- initializePostInlineEntryExitInstrumenterPass(
- *PassRegistry::getPassRegistry());
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
- bool runOnFunction(Function &F) override { return ::runOnFunction(F, true); }
-};
-char PostInlineEntryExitInstrumenter::ID = 0;
-}
-
-INITIALIZE_PASS(
- EntryExitInstrumenter, "ee-instrument",
- "Instrument function entry/exit with calls to e.g. mcount() (pre inlining)",
- false, false)
-INITIALIZE_PASS(PostInlineEntryExitInstrumenter, "post-inline-ee-instrument",
- "Instrument function entry/exit with calls to e.g. mcount() "
- "(post inlining)",
- false, false)
-
-FunctionPass *llvm::createEntryExitInstrumenterPass() {
- return new EntryExitInstrumenter();
-}
-
-FunctionPass *llvm::createPostInlineEntryExitInstrumenterPass() {
- return new PostInlineEntryExitInstrumenter();
-}
-
-PreservedAnalyses
-llvm::EntryExitInstrumenterPass::run(Function &F, FunctionAnalysisManager &AM) {
- runOnFunction(F, PostInlining);
- PreservedAnalyses PA;
- PA.preserveSet<CFGAnalyses>();
- return PA;
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
deleted file mode 100644
index 914babeb6829..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-//===- EscapeEnumerator.cpp -----------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Defines a helper class that enumerates all possible exits from a function,
-// including exception handling.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/EscapeEnumerator.h"
-#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Module.h"
-using namespace llvm;
-
-static FunctionCallee getDefaultPersonalityFn(Module *M) {
- LLVMContext &C = M->getContext();
- Triple T(M->getTargetTriple());
- EHPersonality Pers = getDefaultEHPersonality(T);
- return M->getOrInsertFunction(getEHPersonalityName(Pers),
- FunctionType::get(Type::getInt32Ty(C), true));
-}
-
-IRBuilder<> *EscapeEnumerator::Next() {
- if (Done)
- return nullptr;
-
- // Find all 'return', 'resume', and 'unwind' instructions.
- while (StateBB != StateE) {
- BasicBlock *CurBB = &*StateBB++;
-
- // Branches and invokes do not escape, only unwind, resume, and return
- // do.
- Instruction *TI = CurBB->getTerminator();
- if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
- continue;
-
- Builder.SetInsertPoint(TI);
- return &Builder;
- }
-
- Done = true;
-
- if (!HandleExceptions)
- return nullptr;
-
- if (F.doesNotThrow())
- return nullptr;
-
- // Find all 'call' instructions that may throw.
- SmallVector<Instruction *, 16> Calls;
- for (BasicBlock &BB : F)
- for (Instruction &II : BB)
- if (CallInst *CI = dyn_cast<CallInst>(&II))
- if (!CI->doesNotThrow())
- Calls.push_back(CI);
-
- if (Calls.empty())
- return nullptr;
-
- // Create a cleanup block.
- LLVMContext &C = F.getContext();
- BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
- Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C));
- if (!F.hasPersonalityFn()) {
- FunctionCallee PersFn = getDefaultPersonalityFn(F.getParent());
- F.setPersonalityFn(cast<Constant>(PersFn.getCallee()));
- }
-
- if (isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) {
- report_fatal_error("Scoped EH not supported");
- }
-
- LandingPadInst *LPad =
- LandingPadInst::Create(ExnTy, 1, "cleanup.lpad", CleanupBB);
- LPad->setCleanup(true);
- ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
-
- // Transform the 'call' instructions into 'invoke's branching to the
- // cleanup block. Go in reverse order to make prettier BB names.
- SmallVector<Value *, 16> Args;
- for (unsigned I = Calls.size(); I != 0;) {
- CallInst *CI = cast<CallInst>(Calls[--I]);
- changeToInvokeAndSplitBasicBlock(CI, CleanupBB);
- }
-
- Builder.SetInsertPoint(RI);
- return &Builder;
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp
deleted file mode 100644
index 0e203f4e075d..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp
+++ /dev/null
@@ -1,731 +0,0 @@
-//===- Evaluator.cpp - LLVM IR evaluator ----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Function evaluator for LLVM IR.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/Evaluator.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <iterator>
-
-#define DEBUG_TYPE "evaluator"
-
-using namespace llvm;
-
-static inline bool
-isSimpleEnoughValueToCommit(Constant *C,
- SmallPtrSetImpl<Constant *> &SimpleConstants,
- const DataLayout &DL);
-
-/// Return true if the specified constant can be handled by the code generator.
-/// We don't want to generate something like:
-/// void *X = &X/42;
-/// because the code generator doesn't have a relocation that can handle that.
-///
-/// This function should be called if C was not found (but just got inserted)
-/// in SimpleConstants to avoid having to rescan the same constants all the
-/// time.
-static bool
-isSimpleEnoughValueToCommitHelper(Constant *C,
- SmallPtrSetImpl<Constant *> &SimpleConstants,
- const DataLayout &DL) {
- // Simple global addresses are supported, do not allow dllimport or
- // thread-local globals.
- if (auto *GV = dyn_cast<GlobalValue>(C))
- return !GV->hasDLLImportStorageClass() && !GV->isThreadLocal();
-
- // Simple integer, undef, constant aggregate zero, etc are all supported.
- if (C->getNumOperands() == 0 || isa<BlockAddress>(C))
- return true;
-
- // Aggregate values are safe if all their elements are.
- if (isa<ConstantAggregate>(C)) {
- for (Value *Op : C->operands())
- if (!isSimpleEnoughValueToCommit(cast<Constant>(Op), SimpleConstants, DL))
- return false;
- return true;
- }
-
- // We don't know exactly what relocations are allowed in constant expressions,
- // so we allow &global+constantoffset, which is safe and uniformly supported
- // across targets.
- ConstantExpr *CE = cast<ConstantExpr>(C);
- switch (CE->getOpcode()) {
- case Instruction::BitCast:
- // Bitcast is fine if the casted value is fine.
- return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
-
- case Instruction::IntToPtr:
- case Instruction::PtrToInt:
- // int <=> ptr is fine if the int type is the same size as the
- // pointer type.
- if (DL.getTypeSizeInBits(CE->getType()) !=
- DL.getTypeSizeInBits(CE->getOperand(0)->getType()))
- return false;
- return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
-
- // GEP is fine if it is simple + constant offset.
- case Instruction::GetElementPtr:
- for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
- if (!isa<ConstantInt>(CE->getOperand(i)))
- return false;
- return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
-
- case Instruction::Add:
- // We allow simple+cst.
- if (!isa<ConstantInt>(CE->getOperand(1)))
- return false;
- return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
- }
- return false;
-}
-
-static inline bool
-isSimpleEnoughValueToCommit(Constant *C,
- SmallPtrSetImpl<Constant *> &SimpleConstants,
- const DataLayout &DL) {
- // If we already checked this constant, we win.
- if (!SimpleConstants.insert(C).second)
- return true;
- // Check the constant.
- return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL);
-}
-
-/// Return true if this constant is simple enough for us to understand. In
-/// particular, if it is a cast to anything other than from one pointer type to
-/// another pointer type, we punt. We basically just support direct accesses to
-/// globals and GEP's of globals. This should be kept up to date with
-/// CommitValueTo.
-static bool isSimpleEnoughPointerToCommit(Constant *C) {
- // Conservatively, avoid aggregate types. This is because we don't
- // want to worry about them partially overlapping other stores.
- if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
- return false;
-
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
- // Do not allow weak/*_odr/linkonce linkage or external globals.
- return GV->hasUniqueInitializer();
-
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
- // Handle a constantexpr gep.
- if (CE->getOpcode() == Instruction::GetElementPtr &&
- isa<GlobalVariable>(CE->getOperand(0)) &&
- cast<GEPOperator>(CE)->isInBounds()) {
- GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
- // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
- // external globals.
- if (!GV->hasUniqueInitializer())
- return false;
-
- // The first index must be zero.
- ConstantInt *CI = dyn_cast<ConstantInt>(*std::next(CE->op_begin()));
- if (!CI || !CI->isZero()) return false;
-
- // The remaining indices must be compile-time known integers within the
- // notional bounds of the corresponding static array types.
- if (!CE->isGEPWithNoNotionalOverIndexing())
- return false;
-
- return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
-
- // A constantexpr bitcast from a pointer to another pointer is a no-op,
- // and we know how to evaluate it by moving the bitcast from the pointer
- // operand to the value operand.
- } else if (CE->getOpcode() == Instruction::BitCast &&
- isa<GlobalVariable>(CE->getOperand(0))) {
- // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
- // external globals.
- return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
- }
- }
-
- return false;
-}
-
-/// Apply 'Func' to Ptr. If this returns nullptr, introspect the pointer's
-/// type and walk down through the initial elements to obtain additional
-/// pointers to try. Returns the first non-null return value from Func, or
-/// nullptr if the type can't be introspected further.
-static Constant *
-evaluateBitcastFromPtr(Constant *Ptr, const DataLayout &DL,
- const TargetLibraryInfo *TLI,
- std::function<Constant *(Constant *)> Func) {
- Constant *Val;
- while (!(Val = Func(Ptr))) {
- // If Ty is a struct, we can convert the pointer to the struct
- // into a pointer to its first member.
- // FIXME: This could be extended to support arrays as well.
- Type *Ty = cast<PointerType>(Ptr->getType())->getElementType();
- if (!isa<StructType>(Ty))
- break;
-
- IntegerType *IdxTy = IntegerType::get(Ty->getContext(), 32);
- Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
- Constant *const IdxList[] = {IdxZero, IdxZero};
-
- Ptr = ConstantExpr::getGetElementPtr(Ty, Ptr, IdxList);
- if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI))
- Ptr = FoldedPtr;
- }
- return Val;
-}
-
-static Constant *getInitializer(Constant *C) {
- auto *GV = dyn_cast<GlobalVariable>(C);
- return GV && GV->hasDefinitiveInitializer() ? GV->getInitializer() : nullptr;
-}
-
-/// Return the value that would be computed by a load from P after the stores
-/// reflected by 'memory' have been performed. If we can't decide, return null.
-Constant *Evaluator::ComputeLoadResult(Constant *P) {
- // If this memory location has been recently stored, use the stored value: it
- // is the most up-to-date.
- auto findMemLoc = [this](Constant *Ptr) {
- DenseMap<Constant *, Constant *>::const_iterator I =
- MutatedMemory.find(Ptr);
- return I != MutatedMemory.end() ? I->second : nullptr;
- };
-
- if (Constant *Val = findMemLoc(P))
- return Val;
-
- // Access it.
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
- if (GV->hasDefinitiveInitializer())
- return GV->getInitializer();
- return nullptr;
- }
-
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) {
- switch (CE->getOpcode()) {
- // Handle a constantexpr getelementptr.
- case Instruction::GetElementPtr:
- if (auto *I = getInitializer(CE->getOperand(0)))
- return ConstantFoldLoadThroughGEPConstantExpr(I, CE);
- break;
- // Handle a constantexpr bitcast.
- case Instruction::BitCast:
- // We're evaluating a load through a pointer that was bitcast to a
- // different type. See if the "from" pointer has recently been stored.
- // If it hasn't, we may still be able to find a stored pointer by
- // introspecting the type.
- Constant *Val =
- evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, findMemLoc);
- if (!Val)
- Val = getInitializer(CE->getOperand(0));
- if (Val)
- return ConstantFoldLoadThroughBitcast(
- Val, P->getType()->getPointerElementType(), DL);
- break;
- }
- }
-
- return nullptr; // don't know how to evaluate.
-}
-
-static Function *getFunction(Constant *C) {
- if (auto *Fn = dyn_cast<Function>(C))
- return Fn;
-
- if (auto *Alias = dyn_cast<GlobalAlias>(C))
- if (auto *Fn = dyn_cast<Function>(Alias->getAliasee()))
- return Fn;
- return nullptr;
-}
-
-Function *
-Evaluator::getCalleeWithFormalArgs(CallSite &CS,
- SmallVector<Constant *, 8> &Formals) {
- auto *V = CS.getCalledValue();
- if (auto *Fn = getFunction(getVal(V)))
- return getFormalParams(CS, Fn, Formals) ? Fn : nullptr;
-
- auto *CE = dyn_cast<ConstantExpr>(V);
- if (!CE || CE->getOpcode() != Instruction::BitCast ||
- !getFormalParams(CS, getFunction(CE->getOperand(0)), Formals))
- return nullptr;
-
- return dyn_cast<Function>(
- ConstantFoldLoadThroughBitcast(CE, CE->getOperand(0)->getType(), DL));
-}
-
-bool Evaluator::getFormalParams(CallSite &CS, Function *F,
- SmallVector<Constant *, 8> &Formals) {
- if (!F)
- return false;
-
- auto *FTy = F->getFunctionType();
- if (FTy->getNumParams() > CS.getNumArgOperands()) {
- LLVM_DEBUG(dbgs() << "Too few arguments for function.\n");
- return false;
- }
-
- auto ArgI = CS.arg_begin();
- for (auto ParI = FTy->param_begin(), ParE = FTy->param_end(); ParI != ParE;
- ++ParI) {
- auto *ArgC = ConstantFoldLoadThroughBitcast(getVal(*ArgI), *ParI, DL);
- if (!ArgC) {
- LLVM_DEBUG(dbgs() << "Can not convert function argument.\n");
- return false;
- }
- Formals.push_back(ArgC);
- ++ArgI;
- }
- return true;
-}
-
-/// If call expression contains bitcast then we may need to cast
-/// evaluated return value to a type of the call expression.
-Constant *Evaluator::castCallResultIfNeeded(Value *CallExpr, Constant *RV) {
- ConstantExpr *CE = dyn_cast<ConstantExpr>(CallExpr);
- if (!RV || !CE || CE->getOpcode() != Instruction::BitCast)
- return RV;
-
- if (auto *FT =
- dyn_cast<FunctionType>(CE->getType()->getPointerElementType())) {
- RV = ConstantFoldLoadThroughBitcast(RV, FT->getReturnType(), DL);
- if (!RV)
- LLVM_DEBUG(dbgs() << "Failed to fold bitcast call expr\n");
- }
- return RV;
-}
-
-/// Evaluate all instructions in block BB, returning true if successful, false
-/// if we can't evaluate it. NewBB returns the next BB that control flows into,
-/// or null upon return.
-bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
- BasicBlock *&NextBB) {
- // This is the main evaluation loop.
- while (true) {
- Constant *InstResult = nullptr;
-
- LLVM_DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
-
- if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
- if (!SI->isSimple()) {
- LLVM_DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n");
- return false; // no volatile/atomic accesses.
- }
- Constant *Ptr = getVal(SI->getOperand(1));
- if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) {
- LLVM_DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
- Ptr = FoldedPtr;
- LLVM_DEBUG(dbgs() << "; To: " << *Ptr << "\n");
- }
- if (!isSimpleEnoughPointerToCommit(Ptr)) {
- // If this is too complex for us to commit, reject it.
- LLVM_DEBUG(
- dbgs() << "Pointer is too complex for us to evaluate store.");
- return false;
- }
-
- Constant *Val = getVal(SI->getOperand(0));
-
- // If this might be too difficult for the backend to handle (e.g. the addr
- // of one global variable divided by another) then we can't commit it.
- if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) {
- LLVM_DEBUG(dbgs() << "Store value is too complex to evaluate store. "
- << *Val << "\n");
- return false;
- }
-
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
- if (CE->getOpcode() == Instruction::BitCast) {
- LLVM_DEBUG(dbgs()
- << "Attempting to resolve bitcast on constant ptr.\n");
- // If we're evaluating a store through a bitcast, then we need
- // to pull the bitcast off the pointer type and push it onto the
- // stored value. In order to push the bitcast onto the stored value,
- // a bitcast from the pointer's element type to Val's type must be
- // legal. If it's not, we can try introspecting the type to find a
- // legal conversion.
-
- auto castValTy = [&](Constant *P) -> Constant * {
- Type *Ty = cast<PointerType>(P->getType())->getElementType();
- if (Constant *FV = ConstantFoldLoadThroughBitcast(Val, Ty, DL)) {
- Ptr = P;
- return FV;
- }
- return nullptr;
- };
-
- Constant *NewVal =
- evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, castValTy);
- if (!NewVal) {
- LLVM_DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
- "evaluate.\n");
- return false;
- }
-
- Val = NewVal;
- LLVM_DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n");
- }
- }
-
- MutatedMemory[Ptr] = Val;
- } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
- InstResult = ConstantExpr::get(BO->getOpcode(),
- getVal(BO->getOperand(0)),
- getVal(BO->getOperand(1)));
- LLVM_DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: "
- << *InstResult << "\n");
- } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
- InstResult = ConstantExpr::getCompare(CI->getPredicate(),
- getVal(CI->getOperand(0)),
- getVal(CI->getOperand(1)));
- LLVM_DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult
- << "\n");
- } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
- InstResult = ConstantExpr::getCast(CI->getOpcode(),
- getVal(CI->getOperand(0)),
- CI->getType());
- LLVM_DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult
- << "\n");
- } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
- InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
- getVal(SI->getOperand(1)),
- getVal(SI->getOperand(2)));
- LLVM_DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult
- << "\n");
- } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) {
- InstResult = ConstantExpr::getExtractValue(
- getVal(EVI->getAggregateOperand()), EVI->getIndices());
- LLVM_DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: "
- << *InstResult << "\n");
- } else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) {
- InstResult = ConstantExpr::getInsertValue(
- getVal(IVI->getAggregateOperand()),
- getVal(IVI->getInsertedValueOperand()), IVI->getIndices());
- LLVM_DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: "
- << *InstResult << "\n");
- } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
- Constant *P = getVal(GEP->getOperand(0));
- SmallVector<Constant*, 8> GEPOps;
- for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
- i != e; ++i)
- GEPOps.push_back(getVal(*i));
- InstResult =
- ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps,
- cast<GEPOperator>(GEP)->isInBounds());
- LLVM_DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult << "\n");
- } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
- if (!LI->isSimple()) {
- LLVM_DEBUG(
- dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");
- return false; // no volatile/atomic accesses.
- }
-
- Constant *Ptr = getVal(LI->getOperand(0));
- if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) {
- Ptr = FoldedPtr;
- LLVM_DEBUG(dbgs() << "Found a constant pointer expression, constant "
- "folding: "
- << *Ptr << "\n");
- }
- InstResult = ComputeLoadResult(Ptr);
- if (!InstResult) {
- LLVM_DEBUG(
- dbgs() << "Failed to compute load result. Can not evaluate load."
- "\n");
- return false; // Could not evaluate load.
- }
-
- LLVM_DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n");
- } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
- if (AI->isArrayAllocation()) {
- LLVM_DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n");
- return false; // Cannot handle array allocs.
- }
- Type *Ty = AI->getAllocatedType();
- AllocaTmps.push_back(llvm::make_unique<GlobalVariable>(
- Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty),
- AI->getName(), /*TLMode=*/GlobalValue::NotThreadLocal,
- AI->getType()->getPointerAddressSpace()));
- InstResult = AllocaTmps.back().get();
- LLVM_DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
- } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
- CallSite CS(&*CurInst);
-
- // Debug info can safely be ignored here.
- if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
- LLVM_DEBUG(dbgs() << "Ignoring debug info.\n");
- ++CurInst;
- continue;
- }
-
- // Cannot handle inline asm.
- if (isa<InlineAsm>(CS.getCalledValue())) {
- LLVM_DEBUG(dbgs() << "Found inline asm, can not evaluate.\n");
- return false;
- }
-
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
- if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
- if (MSI->isVolatile()) {
- LLVM_DEBUG(dbgs() << "Can not optimize a volatile memset "
- << "intrinsic.\n");
- return false;
- }
- Constant *Ptr = getVal(MSI->getDest());
- Constant *Val = getVal(MSI->getValue());
- Constant *DestVal = ComputeLoadResult(getVal(Ptr));
- if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
- // This memset is a no-op.
- LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n");
- ++CurInst;
- continue;
- }
- }
-
- if (II->isLifetimeStartOrEnd()) {
- LLVM_DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
- ++CurInst;
- continue;
- }
-
- if (II->getIntrinsicID() == Intrinsic::invariant_start) {
- // We don't insert an entry into Values, as it doesn't have a
- // meaningful return value.
- if (!II->use_empty()) {
- LLVM_DEBUG(dbgs()
- << "Found unused invariant_start. Can't evaluate.\n");
- return false;
- }
- ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
- Value *PtrArg = getVal(II->getArgOperand(1));
- Value *Ptr = PtrArg->stripPointerCasts();
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
- Type *ElemTy = GV->getValueType();
- if (!Size->isMinusOne() &&
- Size->getValue().getLimitedValue() >=
- DL.getTypeStoreSize(ElemTy)) {
- Invariants.insert(GV);
- LLVM_DEBUG(dbgs() << "Found a global var that is an invariant: "
- << *GV << "\n");
- } else {
- LLVM_DEBUG(dbgs()
- << "Found a global var, but can not treat it as an "
- "invariant.\n");
- }
- }
- // Continue even if we do nothing.
- ++CurInst;
- continue;
- } else if (II->getIntrinsicID() == Intrinsic::assume) {
- LLVM_DEBUG(dbgs() << "Skipping assume intrinsic.\n");
- ++CurInst;
- continue;
- } else if (II->getIntrinsicID() == Intrinsic::sideeffect) {
- LLVM_DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n");
- ++CurInst;
- continue;
- }
-
- LLVM_DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
- return false;
- }
-
- // Resolve function pointers.
- SmallVector<Constant *, 8> Formals;
- Function *Callee = getCalleeWithFormalArgs(CS, Formals);
- if (!Callee || Callee->isInterposable()) {
- LLVM_DEBUG(dbgs() << "Can not resolve function pointer.\n");
- return false; // Cannot resolve.
- }
-
- if (Callee->isDeclaration()) {
- // If this is a function we can constant fold, do it.
- if (Constant *C = ConstantFoldCall(cast<CallBase>(CS.getInstruction()),
- Callee, Formals, TLI)) {
- InstResult = castCallResultIfNeeded(CS.getCalledValue(), C);
- if (!InstResult)
- return false;
- LLVM_DEBUG(dbgs() << "Constant folded function call. Result: "
- << *InstResult << "\n");
- } else {
- LLVM_DEBUG(dbgs() << "Can not constant fold function call.\n");
- return false;
- }
- } else {
- if (Callee->getFunctionType()->isVarArg()) {
- LLVM_DEBUG(dbgs() << "Can not constant fold vararg function call.\n");
- return false;
- }
-
- Constant *RetVal = nullptr;
- // Execute the call, if successful, use the return value.
- ValueStack.emplace_back();
- if (!EvaluateFunction(Callee, RetVal, Formals)) {
- LLVM_DEBUG(dbgs() << "Failed to evaluate function.\n");
- return false;
- }
- ValueStack.pop_back();
- InstResult = castCallResultIfNeeded(CS.getCalledValue(), RetVal);
- if (RetVal && !InstResult)
- return false;
-
- if (InstResult) {
- LLVM_DEBUG(dbgs() << "Successfully evaluated function. Result: "
- << *InstResult << "\n\n");
- } else {
- LLVM_DEBUG(dbgs()
- << "Successfully evaluated function. Result: 0\n\n");
- }
- }
- } else if (CurInst->isTerminator()) {
- LLVM_DEBUG(dbgs() << "Found a terminator instruction.\n");
-
- if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
- if (BI->isUnconditional()) {
- NextBB = BI->getSuccessor(0);
- } else {
- ConstantInt *Cond =
- dyn_cast<ConstantInt>(getVal(BI->getCondition()));
- if (!Cond) return false; // Cannot determine.
-
- NextBB = BI->getSuccessor(!Cond->getZExtValue());
- }
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
- ConstantInt *Val =
- dyn_cast<ConstantInt>(getVal(SI->getCondition()));
- if (!Val) return false; // Cannot determine.
- NextBB = SI->findCaseValue(Val)->getCaseSuccessor();
- } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
- Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
- if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
- NextBB = BA->getBasicBlock();
- else
- return false; // Cannot determine.
- } else if (isa<ReturnInst>(CurInst)) {
- NextBB = nullptr;
- } else {
- // invoke, unwind, resume, unreachable.
- LLVM_DEBUG(dbgs() << "Can not handle terminator.");
- return false; // Cannot handle this terminator.
- }
-
- // We succeeded at evaluating this block!
- LLVM_DEBUG(dbgs() << "Successfully evaluated block.\n");
- return true;
- } else {
- // Did not know how to evaluate this!
- LLVM_DEBUG(
- dbgs() << "Failed to evaluate block due to unhandled instruction."
- "\n");
- return false;
- }
-
- if (!CurInst->use_empty()) {
- if (auto *FoldedInstResult = ConstantFoldConstant(InstResult, DL, TLI))
- InstResult = FoldedInstResult;
-
- setVal(&*CurInst, InstResult);
- }
-
- // If we just processed an invoke, we finished evaluating the block.
- if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
- NextBB = II->getNormalDest();
- LLVM_DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n");
- return true;
- }
-
- // Advance program counter.
- ++CurInst;
- }
-}
-
-/// Evaluate a call to function F, returning true if successful, false if we
-/// can't evaluate it. ActualArgs contains the formal arguments for the
-/// function.
-bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
- const SmallVectorImpl<Constant*> &ActualArgs) {
- // Check to see if this function is already executing (recursion). If so,
- // bail out. TODO: we might want to accept limited recursion.
- if (is_contained(CallStack, F))
- return false;
-
- CallStack.push_back(F);
-
- // Initialize arguments to the incoming values specified.
- unsigned ArgNo = 0;
- for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
- ++AI, ++ArgNo)
- setVal(&*AI, ActualArgs[ArgNo]);
-
- // ExecutedBlocks - We only handle non-looping, non-recursive code. As such,
- // we can only evaluate any one basic block at most once. This set keeps
- // track of what we have executed so we can detect recursive cases etc.
- SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
-
- // CurBB - The current basic block we're evaluating.
- BasicBlock *CurBB = &F->front();
-
- BasicBlock::iterator CurInst = CurBB->begin();
-
- while (true) {
- BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings.
- LLVM_DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
-
- if (!EvaluateBlock(CurInst, NextBB))
- return false;
-
- if (!NextBB) {
- // Successfully running until there's no next block means that we found
- // the return. Fill it the return value and pop the call stack.
- ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
- if (RI->getNumOperands())
- RetVal = getVal(RI->getOperand(0));
- CallStack.pop_back();
- return true;
- }
-
- // Okay, we succeeded in evaluating this control flow. See if we have
- // executed the new block before. If so, we have a looping function,
- // which we cannot evaluate in reasonable time.
- if (!ExecutedBlocks.insert(NextBB).second)
- return false; // looped!
-
- // Okay, we have never been in this block before. Check to see if there
- // are any PHI nodes. If so, evaluate them with information about where
- // we came from.
- PHINode *PN = nullptr;
- for (CurInst = NextBB->begin();
- (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
- setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB)));
-
- // Advance to the next block.
- CurBB = NextBB;
- }
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
deleted file mode 100644
index 0c52e6f3703b..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
+++ /dev/null
@@ -1,491 +0,0 @@
-//===- FlatternCFG.cpp - Code to perform CFG flattening -------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Reduce conditional branches in CFG.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include <cassert>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "flattencfg"
-
-namespace {
-
-class FlattenCFGOpt {
- AliasAnalysis *AA;
-
- /// Use parallel-and or parallel-or to generate conditions for
- /// conditional branches.
- bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder);
-
- /// If \param BB is the merge block of an if-region, attempt to merge
- /// the if-region with an adjacent if-region upstream if two if-regions
- /// contain identical instructions.
- bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder);
-
- /// Compare a pair of blocks: \p Block1 and \p Block2, which
- /// are from two if-regions whose entry blocks are \p Head1 and \p
- /// Head2. \returns true if \p Block1 and \p Block2 contain identical
- /// instructions, and have no memory reference alias with \p Head2.
- /// This is used as a legality check for merging if-regions.
- bool CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
- BasicBlock *Block1, BasicBlock *Block2);
-
-public:
- FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {}
-
- bool run(BasicBlock *BB);
-};
-
-} // end anonymous namespace
-
-/// If \param [in] BB has more than one predecessor that is a conditional
-/// branch, attempt to use parallel and/or for the branch condition. \returns
-/// true on success.
-///
-/// Before:
-/// ......
-/// %cmp10 = fcmp une float %tmp1, %tmp2
-/// br i1 %cmp1, label %if.then, label %lor.rhs
-///
-/// lor.rhs:
-/// ......
-/// %cmp11 = fcmp une float %tmp3, %tmp4
-/// br i1 %cmp11, label %if.then, label %ifend
-///
-/// if.end: // the merge block
-/// ......
-///
-/// if.then: // has two predecessors, both of them contains conditional branch.
-/// ......
-/// br label %if.end;
-///
-/// After:
-/// ......
-/// %cmp10 = fcmp une float %tmp1, %tmp2
-/// ......
-/// %cmp11 = fcmp une float %tmp3, %tmp4
-/// %cmp12 = or i1 %cmp10, %cmp11 // parallel-or mode.
-/// br i1 %cmp12, label %if.then, label %ifend
-///
-/// if.end:
-/// ......
-///
-/// if.then:
-/// ......
-/// br label %if.end;
-///
-/// Current implementation handles two cases.
-/// Case 1: \param BB is on the else-path.
-///
-/// BB1
-/// / |
-/// BB2 |
-/// / \ |
-/// BB3 \ | where, BB1, BB2 contain conditional branches.
-/// \ | / BB3 contains unconditional branch.
-/// \ | / BB4 corresponds to \param BB which is also the merge.
-/// BB => BB4
-///
-///
-/// Corresponding source code:
-///
-/// if (a == b && c == d)
-/// statement; // BB3
-///
-/// Case 2: \param BB BB is on the then-path.
-///
-/// BB1
-/// / |
-/// | BB2
-/// \ / | where BB1, BB2 contain conditional branches.
-/// BB => BB3 | BB3 contains unconditiona branch and corresponds
-/// \ / to \param BB. BB4 is the merge.
-/// BB4
-///
-/// Corresponding source code:
-///
-/// if (a == b || c == d)
-/// statement; // BB3
-///
-/// In both cases, \param BB is the common successor of conditional branches.
-/// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as
-/// its predecessor. In Case 2, \param BB (BB3) only has conditional branches
-/// as its predecessors.
-bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
- PHINode *PHI = dyn_cast<PHINode>(BB->begin());
- if (PHI)
- return false; // For simplicity, avoid cases containing PHI nodes.
-
- BasicBlock *LastCondBlock = nullptr;
- BasicBlock *FirstCondBlock = nullptr;
- BasicBlock *UnCondBlock = nullptr;
- int Idx = -1;
-
- // Check predecessors of \param BB.
- SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
- for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end();
- PI != PE; ++PI) {
- BasicBlock *Pred = *PI;
- BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator());
-
- // All predecessors should terminate with a branch.
- if (!PBI)
- return false;
-
- BasicBlock *PP = Pred->getSinglePredecessor();
-
- if (PBI->isUnconditional()) {
- // Case 1: Pred (BB3) is an unconditional block, it should
- // have a single predecessor (BB2) that is also a predecessor
- // of \param BB (BB4) and should not have address-taken.
- // There should exist only one such unconditional
- // branch among the predecessors.
- if (UnCondBlock || !PP || (Preds.count(PP) == 0) ||
- Pred->hasAddressTaken())
- return false;
-
- UnCondBlock = Pred;
- continue;
- }
-
- // Only conditional branches are allowed beyond this point.
- assert(PBI->isConditional());
-
- // Condition's unique use should be the branch instruction.
- Value *PC = PBI->getCondition();
- if (!PC || !PC->hasOneUse())
- return false;
-
- if (PP && Preds.count(PP)) {
- // These are internal condition blocks to be merged from, e.g.,
- // BB2 in both cases.
- // Should not be address-taken.
- if (Pred->hasAddressTaken())
- return false;
-
- // Instructions in the internal condition blocks should be safe
- // to hoist up.
- for (BasicBlock::iterator BI = Pred->begin(), BE = PBI->getIterator();
- BI != BE;) {
- Instruction *CI = &*BI++;
- if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI))
- return false;
- }
- } else {
- // This is the condition block to be merged into, e.g. BB1 in
- // both cases.
- if (FirstCondBlock)
- return false;
- FirstCondBlock = Pred;
- }
-
- // Find whether BB is uniformly on the true (or false) path
- // for all of its predecessors.
- BasicBlock *PS1 = PBI->getSuccessor(0);
- BasicBlock *PS2 = PBI->getSuccessor(1);
- BasicBlock *PS = (PS1 == BB) ? PS2 : PS1;
- int CIdx = (PS1 == BB) ? 0 : 1;
-
- if (Idx == -1)
- Idx = CIdx;
- else if (CIdx != Idx)
- return false;
-
- // PS is the successor which is not BB. Check successors to identify
- // the last conditional branch.
- if (Preds.count(PS) == 0) {
- // Case 2.
- LastCondBlock = Pred;
- } else {
- // Case 1
- BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator());
- if (BPS && BPS->isUnconditional()) {
- // Case 1: PS(BB3) should be an unconditional branch.
- LastCondBlock = Pred;
- }
- }
- }
-
- if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock))
- return false;
-
- Instruction *TBB = LastCondBlock->getTerminator();
- BasicBlock *PS1 = TBB->getSuccessor(0);
- BasicBlock *PS2 = TBB->getSuccessor(1);
- BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator());
- BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator());
-
- // If PS1 does not jump into PS2, but PS2 jumps into PS1,
- // attempt branch inversion.
- if (!PBI1 || !PBI1->isUnconditional() ||
- (PS1->getTerminator()->getSuccessor(0) != PS2)) {
- // Check whether PS2 jumps into PS1.
- if (!PBI2 || !PBI2->isUnconditional() ||
- (PS2->getTerminator()->getSuccessor(0) != PS1))
- return false;
-
- // Do branch inversion.
- BasicBlock *CurrBlock = LastCondBlock;
- bool EverChanged = false;
- for (; CurrBlock != FirstCondBlock;
- CurrBlock = CurrBlock->getSinglePredecessor()) {
- BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
- CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
- if (!CI)
- continue;
-
- CmpInst::Predicate Predicate = CI->getPredicate();
- // Canonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
- if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) {
- CI->setPredicate(ICmpInst::getInversePredicate(Predicate));
- BI->swapSuccessors();
- EverChanged = true;
- }
- }
- return EverChanged;
- }
-
- // PS1 must have a conditional branch.
- if (!PBI1 || !PBI1->isUnconditional())
- return false;
-
- // PS2 should not contain PHI node.
- PHI = dyn_cast<PHINode>(PS2->begin());
- if (PHI)
- return false;
-
- // Do the transformation.
- BasicBlock *CB;
- BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator());
- bool Iteration = true;
- IRBuilder<>::InsertPointGuard Guard(Builder);
- Value *PC = PBI->getCondition();
-
- do {
- CB = PBI->getSuccessor(1 - Idx);
- // Delete the conditional branch.
- FirstCondBlock->getInstList().pop_back();
- FirstCondBlock->getInstList()
- .splice(FirstCondBlock->end(), CB->getInstList());
- PBI = cast<BranchInst>(FirstCondBlock->getTerminator());
- Value *CC = PBI->getCondition();
- // Merge conditions.
- Builder.SetInsertPoint(PBI);
- Value *NC;
- if (Idx == 0)
- // Case 2, use parallel or.
- NC = Builder.CreateOr(PC, CC);
- else
- // Case 1, use parallel and.
- NC = Builder.CreateAnd(PC, CC);
-
- PBI->replaceUsesOfWith(CC, NC);
- PC = NC;
- if (CB == LastCondBlock)
- Iteration = false;
- // Remove internal conditional branches.
- CB->dropAllReferences();
- // make CB unreachable and let downstream to delete the block.
- new UnreachableInst(CB->getContext(), CB);
- } while (Iteration);
-
- LLVM_DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
- return true;
-}
-
-/// Compare blocks from two if-regions, where \param Head1 is the entry of the
-/// 1st if-region. \param Head2 is the entry of the 2nd if-region. \param
-/// Block1 is a block in the 1st if-region to compare. \param Block2 is a block
-// in the 2nd if-region to compare. \returns true if \param Block1 and \param
-/// Block2 have identical instructions and do not have memory reference alias
-/// with \param Head2.
-bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
- BasicBlock *Block1,
- BasicBlock *Block2) {
- Instruction *PTI2 = Head2->getTerminator();
- Instruction *PBI2 = &Head2->front();
-
- bool eq1 = (Block1 == Head1);
- bool eq2 = (Block2 == Head2);
- if (eq1 || eq2) {
- // An empty then-path or else-path.
- return (eq1 == eq2);
- }
-
- // Check whether instructions in Block1 and Block2 are identical
- // and do not alias with instructions in Head2.
- BasicBlock::iterator iter1 = Block1->begin();
- BasicBlock::iterator end1 = Block1->getTerminator()->getIterator();
- BasicBlock::iterator iter2 = Block2->begin();
- BasicBlock::iterator end2 = Block2->getTerminator()->getIterator();
-
- while (true) {
- if (iter1 == end1) {
- if (iter2 != end2)
- return false;
- break;
- }
-
- if (!iter1->isIdenticalTo(&*iter2))
- return false;
-
- // Illegal to remove instructions with side effects except
- // non-volatile stores.
- if (iter1->mayHaveSideEffects()) {
- Instruction *CurI = &*iter1;
- StoreInst *SI = dyn_cast<StoreInst>(CurI);
- if (!SI || SI->isVolatile())
- return false;
- }
-
- // For simplicity and speed, data dependency check can be
- // avoided if read from memory doesn't exist.
- if (iter1->mayReadFromMemory())
- return false;
-
- if (iter1->mayWriteToMemory()) {
- for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) {
- if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) {
- // Check alias with Head2.
- if (!AA || AA->alias(&*iter1, &*BI))
- return false;
- }
- }
- }
- ++iter1;
- ++iter2;
- }
-
- return true;
-}
-
-/// Check whether \param BB is the merge block of a if-region. If yes, check
-/// whether there exists an adjacent if-region upstream, the two if-regions
-/// contain identical instructions and can be legally merged. \returns true if
-/// the two if-regions are merged.
-///
-/// From:
-/// if (a)
-/// statement;
-/// if (b)
-/// statement;
-///
-/// To:
-/// if (a || b)
-/// statement;
-bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
- BasicBlock *IfTrue2, *IfFalse2;
- Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2);
- Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2);
- if (!CInst2)
- return false;
-
- BasicBlock *SecondEntryBlock = CInst2->getParent();
- if (SecondEntryBlock->hasAddressTaken())
- return false;
-
- BasicBlock *IfTrue1, *IfFalse1;
- Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1);
- Instruction *CInst1 = dyn_cast_or_null<Instruction>(IfCond1);
- if (!CInst1)
- return false;
-
- BasicBlock *FirstEntryBlock = CInst1->getParent();
-
- // Either then-path or else-path should be empty.
- if ((IfTrue1 != FirstEntryBlock) && (IfFalse1 != FirstEntryBlock))
- return false;
- if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock))
- return false;
-
- Instruction *PTI2 = SecondEntryBlock->getTerminator();
- Instruction *PBI2 = &SecondEntryBlock->front();
-
- if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1,
- IfTrue2))
- return false;
-
- if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfFalse1,
- IfFalse2))
- return false;
-
- // Check whether \param SecondEntryBlock has side-effect and is safe to
- // speculate.
- for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) {
- Instruction *CI = &*BI;
- if (isa<PHINode>(CI) || CI->mayHaveSideEffects() ||
- !isSafeToSpeculativelyExecute(CI))
- return false;
- }
-
- // Merge \param SecondEntryBlock into \param FirstEntryBlock.
- FirstEntryBlock->getInstList().pop_back();
- FirstEntryBlock->getInstList()
- .splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList());
- BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator());
- Value *CC = PBI->getCondition();
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
- Builder.SetInsertPoint(PBI);
- Value *NC = Builder.CreateOr(CInst1, CC);
- PBI->replaceUsesOfWith(CC, NC);
- Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
-
- // Remove IfTrue1
- if (IfTrue1 != FirstEntryBlock) {
- IfTrue1->dropAllReferences();
- IfTrue1->eraseFromParent();
- }
-
- // Remove IfFalse1
- if (IfFalse1 != FirstEntryBlock) {
- IfFalse1->dropAllReferences();
- IfFalse1->eraseFromParent();
- }
-
- // Remove \param SecondEntryBlock
- SecondEntryBlock->dropAllReferences();
- SecondEntryBlock->eraseFromParent();
- LLVM_DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock);
- return true;
-}
-
-bool FlattenCFGOpt::run(BasicBlock *BB) {
- assert(BB && BB->getParent() && "Block not embedded in function!");
- assert(BB->getTerminator() && "Degenerate basic block encountered!");
-
- IRBuilder<> Builder(BB);
-
- if (FlattenParallelAndOr(BB, Builder) || MergeIfRegion(BB, Builder))
- return true;
- return false;
-}
-
-/// FlattenCFG - This function is used to flatten a CFG. For
-/// example, it uses parallel-and and parallel-or mode to collapse
-/// if-conditions and merge if-regions with identical statements.
-bool llvm::FlattenCFG(BasicBlock *BB, AliasAnalysis *AA) {
- return FlattenCFGOpt(AA).run(BB);
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp
deleted file mode 100644
index a9b28754c8e9..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ /dev/null
@@ -1,948 +0,0 @@
-//===- FunctionComparator.h - Function Comparator -------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the FunctionComparator and GlobalNumberState classes
-// which are used by the MergeFunctions pass for comparing functions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/FunctionComparator.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-#include <cstddef>
-#include <cstdint>
-#include <utility>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "functioncomparator"
-
-int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
- if (L < R) return -1;
- if (L > R) return 1;
- return 0;
-}
-
-int FunctionComparator::cmpOrderings(AtomicOrdering L, AtomicOrdering R) const {
- if ((int)L < (int)R) return -1;
- if ((int)L > (int)R) return 1;
- return 0;
-}
-
-int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const {
- if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth()))
- return Res;
- if (L.ugt(R)) return 1;
- if (R.ugt(L)) return -1;
- return 0;
-}
-
-int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const {
- // Floats are ordered first by semantics (i.e. float, double, half, etc.),
- // then by value interpreted as a bitstring (aka APInt).
- const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics();
- if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL),
- APFloat::semanticsPrecision(SR)))
- return Res;
- if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL),
- APFloat::semanticsMaxExponent(SR)))
- return Res;
- if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL),
- APFloat::semanticsMinExponent(SR)))
- return Res;
- if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL),
- APFloat::semanticsSizeInBits(SR)))
- return Res;
- return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt());
-}
-
-int FunctionComparator::cmpMem(StringRef L, StringRef R) const {
- // Prevent heavy comparison, compare sizes first.
- if (int Res = cmpNumbers(L.size(), R.size()))
- return Res;
-
- // Compare strings lexicographically only when it is necessary: only when
- // strings are equal in size.
- return L.compare(R);
-}
-
-int FunctionComparator::cmpAttrs(const AttributeList L,
- const AttributeList R) const {
- if (int Res = cmpNumbers(L.getNumAttrSets(), R.getNumAttrSets()))
- return Res;
-
- for (unsigned i = L.index_begin(), e = L.index_end(); i != e; ++i) {
- AttributeSet LAS = L.getAttributes(i);
- AttributeSet RAS = R.getAttributes(i);
- AttributeSet::iterator LI = LAS.begin(), LE = LAS.end();
- AttributeSet::iterator RI = RAS.begin(), RE = RAS.end();
- for (; LI != LE && RI != RE; ++LI, ++RI) {
- Attribute LA = *LI;
- Attribute RA = *RI;
- if (LA.isTypeAttribute() && RA.isTypeAttribute()) {
- if (LA.getKindAsEnum() != RA.getKindAsEnum())
- return cmpNumbers(LA.getKindAsEnum(), RA.getKindAsEnum());
-
- Type *TyL = LA.getValueAsType();
- Type *TyR = RA.getValueAsType();
- if (TyL && TyR)
- return cmpTypes(TyL, TyR);
-
- // Two pointers, at least one null, so the comparison result is
- // independent of the value of a real pointer.
- return cmpNumbers((uint64_t)TyL, (uint64_t)TyR);
- }
- if (LA < RA)
- return -1;
- if (RA < LA)
- return 1;
- }
- if (LI != LE)
- return 1;
- if (RI != RE)
- return -1;
- }
- return 0;
-}
-
-int FunctionComparator::cmpRangeMetadata(const MDNode *L,
- const MDNode *R) const {
- if (L == R)
- return 0;
- if (!L)
- return -1;
- if (!R)
- return 1;
- // Range metadata is a sequence of numbers. Make sure they are the same
- // sequence.
- // TODO: Note that as this is metadata, it is possible to drop and/or merge
- // this data when considering functions to merge. Thus this comparison would
- // return 0 (i.e. equivalent), but merging would become more complicated
- // because the ranges would need to be unioned. It is not likely that
- // functions differ ONLY in this metadata if they are actually the same
- // function semantically.
- if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
- return Res;
- for (size_t I = 0; I < L->getNumOperands(); ++I) {
- ConstantInt *LLow = mdconst::extract<ConstantInt>(L->getOperand(I));
- ConstantInt *RLow = mdconst::extract<ConstantInt>(R->getOperand(I));
- if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue()))
- return Res;
- }
- return 0;
-}
-
-int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L,
- const Instruction *R) const {
- ImmutableCallSite LCS(L);
- ImmutableCallSite RCS(R);
-
- assert(LCS && RCS && "Must be calls or invokes!");
- assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!");
-
- if (int Res =
- cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles()))
- return Res;
-
- for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) {
- auto OBL = LCS.getOperandBundleAt(i);
- auto OBR = RCS.getOperandBundleAt(i);
-
- if (int Res = OBL.getTagName().compare(OBR.getTagName()))
- return Res;
-
- if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size()))
- return Res;
- }
-
- return 0;
-}
-
-/// Constants comparison:
-/// 1. Check whether type of L constant could be losslessly bitcasted to R
-/// type.
-/// 2. Compare constant contents.
-/// For more details see declaration comments.
-int FunctionComparator::cmpConstants(const Constant *L,
- const Constant *R) const {
- Type *TyL = L->getType();
- Type *TyR = R->getType();
-
- // Check whether types are bitcastable. This part is just re-factored
- // Type::canLosslesslyBitCastTo method, but instead of returning true/false,
- // we also pack into result which type is "less" for us.
- int TypesRes = cmpTypes(TyL, TyR);
- if (TypesRes != 0) {
- // Types are different, but check whether we can bitcast them.
- if (!TyL->isFirstClassType()) {
- if (TyR->isFirstClassType())
- return -1;
- // Neither TyL nor TyR are values of first class type. Return the result
- // of comparing the types
- return TypesRes;
- }
- if (!TyR->isFirstClassType()) {
- if (TyL->isFirstClassType())
- return 1;
- return TypesRes;
- }
-
- // Vector -> Vector conversions are always lossless if the two vector types
- // have the same size, otherwise not.
- unsigned TyLWidth = 0;
- unsigned TyRWidth = 0;
-
- if (auto *VecTyL = dyn_cast<VectorType>(TyL))
- TyLWidth = VecTyL->getBitWidth();
- if (auto *VecTyR = dyn_cast<VectorType>(TyR))
- TyRWidth = VecTyR->getBitWidth();
-
- if (TyLWidth != TyRWidth)
- return cmpNumbers(TyLWidth, TyRWidth);
-
- // Zero bit-width means neither TyL nor TyR are vectors.
- if (!TyLWidth) {
- PointerType *PTyL = dyn_cast<PointerType>(TyL);
- PointerType *PTyR = dyn_cast<PointerType>(TyR);
- if (PTyL && PTyR) {
- unsigned AddrSpaceL = PTyL->getAddressSpace();
- unsigned AddrSpaceR = PTyR->getAddressSpace();
- if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR))
- return Res;
- }
- if (PTyL)
- return 1;
- if (PTyR)
- return -1;
-
- // TyL and TyR aren't vectors, nor pointers. We don't know how to
- // bitcast them.
- return TypesRes;
- }
- }
-
- // OK, types are bitcastable, now check constant contents.
-
- if (L->isNullValue() && R->isNullValue())
- return TypesRes;
- if (L->isNullValue() && !R->isNullValue())
- return 1;
- if (!L->isNullValue() && R->isNullValue())
- return -1;
-
- auto GlobalValueL = const_cast<GlobalValue *>(dyn_cast<GlobalValue>(L));
- auto GlobalValueR = const_cast<GlobalValue *>(dyn_cast<GlobalValue>(R));
- if (GlobalValueL && GlobalValueR) {
- return cmpGlobalValues(GlobalValueL, GlobalValueR);
- }
-
- if (int Res = cmpNumbers(L->getValueID(), R->getValueID()))
- return Res;
-
- if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) {
- const auto *SeqR = cast<ConstantDataSequential>(R);
- // This handles ConstantDataArray and ConstantDataVector. Note that we
- // compare the two raw data arrays, which might differ depending on the host
- // endianness. This isn't a problem though, because the endiness of a module
- // will affect the order of the constants, but this order is the same
- // for a given input module and host platform.
- return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues());
- }
-
- switch (L->getValueID()) {
- case Value::UndefValueVal:
- case Value::ConstantTokenNoneVal:
- return TypesRes;
- case Value::ConstantIntVal: {
- const APInt &LInt = cast<ConstantInt>(L)->getValue();
- const APInt &RInt = cast<ConstantInt>(R)->getValue();
- return cmpAPInts(LInt, RInt);
- }
- case Value::ConstantFPVal: {
- const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF();
- const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF();
- return cmpAPFloats(LAPF, RAPF);
- }
- case Value::ConstantArrayVal: {
- const ConstantArray *LA = cast<ConstantArray>(L);
- const ConstantArray *RA = cast<ConstantArray>(R);
- uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements();
- uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements();
- if (int Res = cmpNumbers(NumElementsL, NumElementsR))
- return Res;
- for (uint64_t i = 0; i < NumElementsL; ++i) {
- if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)),
- cast<Constant>(RA->getOperand(i))))
- return Res;
- }
- return 0;
- }
- case Value::ConstantStructVal: {
- const ConstantStruct *LS = cast<ConstantStruct>(L);
- const ConstantStruct *RS = cast<ConstantStruct>(R);
- unsigned NumElementsL = cast<StructType>(TyL)->getNumElements();
- unsigned NumElementsR = cast<StructType>(TyR)->getNumElements();
- if (int Res = cmpNumbers(NumElementsL, NumElementsR))
- return Res;
- for (unsigned i = 0; i != NumElementsL; ++i) {
- if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)),
- cast<Constant>(RS->getOperand(i))))
- return Res;
- }
- return 0;
- }
- case Value::ConstantVectorVal: {
- const ConstantVector *LV = cast<ConstantVector>(L);
- const ConstantVector *RV = cast<ConstantVector>(R);
- unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements();
- unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements();
- if (int Res = cmpNumbers(NumElementsL, NumElementsR))
- return Res;
- for (uint64_t i = 0; i < NumElementsL; ++i) {
- if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)),
- cast<Constant>(RV->getOperand(i))))
- return Res;
- }
- return 0;
- }
- case Value::ConstantExprVal: {
- const ConstantExpr *LE = cast<ConstantExpr>(L);
- const ConstantExpr *RE = cast<ConstantExpr>(R);
- unsigned NumOperandsL = LE->getNumOperands();
- unsigned NumOperandsR = RE->getNumOperands();
- if (int Res = cmpNumbers(NumOperandsL, NumOperandsR))
- return Res;
- for (unsigned i = 0; i < NumOperandsL; ++i) {
- if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)),
- cast<Constant>(RE->getOperand(i))))
- return Res;
- }
- return 0;
- }
- case Value::BlockAddressVal: {
- const BlockAddress *LBA = cast<BlockAddress>(L);
- const BlockAddress *RBA = cast<BlockAddress>(R);
- if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction()))
- return Res;
- if (LBA->getFunction() == RBA->getFunction()) {
- // They are BBs in the same function. Order by which comes first in the
- // BB order of the function. This order is deterministic.
- Function* F = LBA->getFunction();
- BasicBlock *LBB = LBA->getBasicBlock();
- BasicBlock *RBB = RBA->getBasicBlock();
- if (LBB == RBB)
- return 0;
- for(BasicBlock &BB : F->getBasicBlockList()) {
- if (&BB == LBB) {
- assert(&BB != RBB);
- return -1;
- }
- if (&BB == RBB)
- return 1;
- }
- llvm_unreachable("Basic Block Address does not point to a basic block in "
- "its function.");
- return -1;
- } else {
- // cmpValues said the functions are the same. So because they aren't
- // literally the same pointer, they must respectively be the left and
- // right functions.
- assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR);
- // cmpValues will tell us if these are equivalent BasicBlocks, in the
- // context of their respective functions.
- return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock());
- }
- }
- default: // Unknown constant, abort.
- LLVM_DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n");
- llvm_unreachable("Constant ValueID not recognized.");
- return -1;
- }
-}
-
-int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue *R) const {
- uint64_t LNumber = GlobalNumbers->getNumber(L);
- uint64_t RNumber = GlobalNumbers->getNumber(R);
- return cmpNumbers(LNumber, RNumber);
-}
-
-/// cmpType - compares two types,
-/// defines total ordering among the types set.
-/// See method declaration comments for more details.
-int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
- PointerType *PTyL = dyn_cast<PointerType>(TyL);
- PointerType *PTyR = dyn_cast<PointerType>(TyR);
-
- const DataLayout &DL = FnL->getParent()->getDataLayout();
- if (PTyL && PTyL->getAddressSpace() == 0)
- TyL = DL.getIntPtrType(TyL);
- if (PTyR && PTyR->getAddressSpace() == 0)
- TyR = DL.getIntPtrType(TyR);
-
- if (TyL == TyR)
- return 0;
-
- if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID()))
- return Res;
-
- switch (TyL->getTypeID()) {
- default:
- llvm_unreachable("Unknown type!");
- case Type::IntegerTyID:
- return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(),
- cast<IntegerType>(TyR)->getBitWidth());
- // TyL == TyR would have returned true earlier, because types are uniqued.
- case Type::VoidTyID:
- case Type::FloatTyID:
- case Type::DoubleTyID:
- case Type::X86_FP80TyID:
- case Type::FP128TyID:
- case Type::PPC_FP128TyID:
- case Type::LabelTyID:
- case Type::MetadataTyID:
- case Type::TokenTyID:
- return 0;
-
- case Type::PointerTyID:
- assert(PTyL && PTyR && "Both types must be pointers here.");
- return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace());
-
- case Type::StructTyID: {
- StructType *STyL = cast<StructType>(TyL);
- StructType *STyR = cast<StructType>(TyR);
- if (STyL->getNumElements() != STyR->getNumElements())
- return cmpNumbers(STyL->getNumElements(), STyR->getNumElements());
-
- if (STyL->isPacked() != STyR->isPacked())
- return cmpNumbers(STyL->isPacked(), STyR->isPacked());
-
- for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) {
- if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i)))
- return Res;
- }
- return 0;
- }
-
- case Type::FunctionTyID: {
- FunctionType *FTyL = cast<FunctionType>(TyL);
- FunctionType *FTyR = cast<FunctionType>(TyR);
- if (FTyL->getNumParams() != FTyR->getNumParams())
- return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams());
-
- if (FTyL->isVarArg() != FTyR->isVarArg())
- return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg());
-
- if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType()))
- return Res;
-
- for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) {
- if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i)))
- return Res;
- }
- return 0;
- }
-
- case Type::ArrayTyID:
- case Type::VectorTyID: {
- auto *STyL = cast<SequentialType>(TyL);
- auto *STyR = cast<SequentialType>(TyR);
- if (STyL->getNumElements() != STyR->getNumElements())
- return cmpNumbers(STyL->getNumElements(), STyR->getNumElements());
- return cmpTypes(STyL->getElementType(), STyR->getElementType());
- }
- }
-}
-
-// Determine whether the two operations are the same except that pointer-to-A
-// and pointer-to-B are equivalent. This should be kept in sync with
-// Instruction::isSameOperationAs.
-// Read method declaration comments for more details.
-int FunctionComparator::cmpOperations(const Instruction *L,
- const Instruction *R,
- bool &needToCmpOperands) const {
- needToCmpOperands = true;
- if (int Res = cmpValues(L, R))
- return Res;
-
- // Differences from Instruction::isSameOperationAs:
- // * replace type comparison with calls to cmpTypes.
- // * we test for I->getRawSubclassOptionalData (nuw/nsw/tail) at the top.
- // * because of the above, we don't test for the tail bit on calls later on.
- if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode()))
- return Res;
-
- if (const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(L)) {
- needToCmpOperands = false;
- const GetElementPtrInst *GEPR = cast<GetElementPtrInst>(R);
- if (int Res =
- cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand()))
- return Res;
- return cmpGEPs(GEPL, GEPR);
- }
-
- if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
- return Res;
-
- if (int Res = cmpTypes(L->getType(), R->getType()))
- return Res;
-
- if (int Res = cmpNumbers(L->getRawSubclassOptionalData(),
- R->getRawSubclassOptionalData()))
- return Res;
-
- // We have two instructions of identical opcode and #operands. Check to see
- // if all operands are the same type
- for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
- if (int Res =
- cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType()))
- return Res;
- }
-
- // Check special state that is a part of some instructions.
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) {
- if (int Res = cmpTypes(AI->getAllocatedType(),
- cast<AllocaInst>(R)->getAllocatedType()))
- return Res;
- return cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment());
- }
- if (const LoadInst *LI = dyn_cast<LoadInst>(L)) {
- if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile()))
- return Res;
- if (int Res =
- cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment()))
- return Res;
- if (int Res =
- cmpOrderings(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))
- return Res;
- if (int Res = cmpNumbers(LI->getSyncScopeID(),
- cast<LoadInst>(R)->getSyncScopeID()))
- return Res;
- return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range),
- cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
- }
- if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
- if (int Res =
- cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile()))
- return Res;
- if (int Res =
- cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment()))
- return Res;
- if (int Res =
- cmpOrderings(SI->getOrdering(), cast<StoreInst>(R)->getOrdering()))
- return Res;
- return cmpNumbers(SI->getSyncScopeID(),
- cast<StoreInst>(R)->getSyncScopeID());
- }
- if (const CmpInst *CI = dyn_cast<CmpInst>(L))
- return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate());
- if (auto CSL = CallSite(const_cast<Instruction *>(L))) {
- auto CSR = CallSite(const_cast<Instruction *>(R));
- if (int Res = cmpNumbers(CSL.getCallingConv(), CSR.getCallingConv()))
- return Res;
- if (int Res = cmpAttrs(CSL.getAttributes(), CSR.getAttributes()))
- return Res;
- if (int Res = cmpOperandBundlesSchema(L, R))
- return Res;
- if (const CallInst *CI = dyn_cast<CallInst>(L))
- if (int Res = cmpNumbers(CI->getTailCallKind(),
- cast<CallInst>(R)->getTailCallKind()))
- return Res;
- return cmpRangeMetadata(L->getMetadata(LLVMContext::MD_range),
- R->getMetadata(LLVMContext::MD_range));
- }
- if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
- ArrayRef<unsigned> LIndices = IVI->getIndices();
- ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices();
- if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
- return Res;
- for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
- if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
- return Res;
- }
- return 0;
- }
- if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) {
- ArrayRef<unsigned> LIndices = EVI->getIndices();
- ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices();
- if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
- return Res;
- for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
- if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
- return Res;
- }
- }
- if (const FenceInst *FI = dyn_cast<FenceInst>(L)) {
- if (int Res =
- cmpOrderings(FI->getOrdering(), cast<FenceInst>(R)->getOrdering()))
- return Res;
- return cmpNumbers(FI->getSyncScopeID(),
- cast<FenceInst>(R)->getSyncScopeID());
- }
- if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) {
- if (int Res = cmpNumbers(CXI->isVolatile(),
- cast<AtomicCmpXchgInst>(R)->isVolatile()))
- return Res;
- if (int Res = cmpNumbers(CXI->isWeak(),
- cast<AtomicCmpXchgInst>(R)->isWeak()))
- return Res;
- if (int Res =
- cmpOrderings(CXI->getSuccessOrdering(),
- cast<AtomicCmpXchgInst>(R)->getSuccessOrdering()))
- return Res;
- if (int Res =
- cmpOrderings(CXI->getFailureOrdering(),
- cast<AtomicCmpXchgInst>(R)->getFailureOrdering()))
- return Res;
- return cmpNumbers(CXI->getSyncScopeID(),
- cast<AtomicCmpXchgInst>(R)->getSyncScopeID());
- }
- if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) {
- if (int Res = cmpNumbers(RMWI->getOperation(),
- cast<AtomicRMWInst>(R)->getOperation()))
- return Res;
- if (int Res = cmpNumbers(RMWI->isVolatile(),
- cast<AtomicRMWInst>(R)->isVolatile()))
- return Res;
- if (int Res = cmpOrderings(RMWI->getOrdering(),
- cast<AtomicRMWInst>(R)->getOrdering()))
- return Res;
- return cmpNumbers(RMWI->getSyncScopeID(),
- cast<AtomicRMWInst>(R)->getSyncScopeID());
- }
- if (const PHINode *PNL = dyn_cast<PHINode>(L)) {
- const PHINode *PNR = cast<PHINode>(R);
- // Ensure that in addition to the incoming values being identical
- // (checked by the caller of this function), the incoming blocks
- // are also identical.
- for (unsigned i = 0, e = PNL->getNumIncomingValues(); i != e; ++i) {
- if (int Res =
- cmpValues(PNL->getIncomingBlock(i), PNR->getIncomingBlock(i)))
- return Res;
- }
- }
- return 0;
-}
-
-// Determine whether two GEP operations perform the same underlying arithmetic.
-// Read method declaration comments for more details.
-int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
- const GEPOperator *GEPR) const {
- unsigned int ASL = GEPL->getPointerAddressSpace();
- unsigned int ASR = GEPR->getPointerAddressSpace();
-
- if (int Res = cmpNumbers(ASL, ASR))
- return Res;
-
- // When we have target data, we can reduce the GEP down to the value in bytes
- // added to the address.
- const DataLayout &DL = FnL->getParent()->getDataLayout();
- unsigned BitWidth = DL.getPointerSizeInBits(ASL);
- APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
- if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
- GEPR->accumulateConstantOffset(DL, OffsetR))
- return cmpAPInts(OffsetL, OffsetR);
- if (int Res = cmpTypes(GEPL->getSourceElementType(),
- GEPR->getSourceElementType()))
- return Res;
-
- if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands()))
- return Res;
-
- for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) {
- if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i)))
- return Res;
- }
-
- return 0;
-}
-
-int FunctionComparator::cmpInlineAsm(const InlineAsm *L,
- const InlineAsm *R) const {
- // InlineAsm's are uniqued. If they are the same pointer, obviously they are
- // the same, otherwise compare the fields.
- if (L == R)
- return 0;
- if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType()))
- return Res;
- if (int Res = cmpMem(L->getAsmString(), R->getAsmString()))
- return Res;
- if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString()))
- return Res;
- if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects()))
- return Res;
- if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack()))
- return Res;
- if (int Res = cmpNumbers(L->getDialect(), R->getDialect()))
- return Res;
- assert(L->getFunctionType() != R->getFunctionType());
- return 0;
-}
-
-/// Compare two values used by the two functions under pair-wise comparison. If
-/// this is the first time the values are seen, they're added to the mapping so
-/// that we will detect mismatches on next use.
-/// See comments in declaration for more details.
-int FunctionComparator::cmpValues(const Value *L, const Value *R) const {
- // Catch self-reference case.
- if (L == FnL) {
- if (R == FnR)
- return 0;
- return -1;
- }
- if (R == FnR) {
- if (L == FnL)
- return 0;
- return 1;
- }
-
- const Constant *ConstL = dyn_cast<Constant>(L);
- const Constant *ConstR = dyn_cast<Constant>(R);
- if (ConstL && ConstR) {
- if (L == R)
- return 0;
- return cmpConstants(ConstL, ConstR);
- }
-
- if (ConstL)
- return 1;
- if (ConstR)
- return -1;
-
- const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L);
- const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
-
- if (InlineAsmL && InlineAsmR)
- return cmpInlineAsm(InlineAsmL, InlineAsmR);
- if (InlineAsmL)
- return 1;
- if (InlineAsmR)
- return -1;
-
- auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())),
- RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size()));
-
- return cmpNumbers(LeftSN.first->second, RightSN.first->second);
-}
-
-// Test whether two basic blocks have equivalent behaviour.
-int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL,
- const BasicBlock *BBR) const {
- BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end();
- BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end();
-
- do {
- bool needToCmpOperands = true;
- if (int Res = cmpOperations(&*InstL, &*InstR, needToCmpOperands))
- return Res;
- if (needToCmpOperands) {
- assert(InstL->getNumOperands() == InstR->getNumOperands());
-
- for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) {
- Value *OpL = InstL->getOperand(i);
- Value *OpR = InstR->getOperand(i);
- if (int Res = cmpValues(OpL, OpR))
- return Res;
- // cmpValues should ensure this is true.
- assert(cmpTypes(OpL->getType(), OpR->getType()) == 0);
- }
- }
-
- ++InstL;
- ++InstR;
- } while (InstL != InstLE && InstR != InstRE);
-
- if (InstL != InstLE && InstR == InstRE)
- return 1;
- if (InstL == InstLE && InstR != InstRE)
- return -1;
- return 0;
-}
-
-int FunctionComparator::compareSignature() const {
- if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes()))
- return Res;
-
- if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC()))
- return Res;
-
- if (FnL->hasGC()) {
- if (int Res = cmpMem(FnL->getGC(), FnR->getGC()))
- return Res;
- }
-
- if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection()))
- return Res;
-
- if (FnL->hasSection()) {
- if (int Res = cmpMem(FnL->getSection(), FnR->getSection()))
- return Res;
- }
-
- if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg()))
- return Res;
-
- // TODO: if it's internal and only used in direct calls, we could handle this
- // case too.
- if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv()))
- return Res;
-
- if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType()))
- return Res;
-
- assert(FnL->arg_size() == FnR->arg_size() &&
- "Identically typed functions have different numbers of args!");
-
- // Visit the arguments so that they get enumerated in the order they're
- // passed in.
- for (Function::const_arg_iterator ArgLI = FnL->arg_begin(),
- ArgRI = FnR->arg_begin(),
- ArgLE = FnL->arg_end();
- ArgLI != ArgLE; ++ArgLI, ++ArgRI) {
- if (cmpValues(&*ArgLI, &*ArgRI) != 0)
- llvm_unreachable("Arguments repeat!");
- }
- return 0;
-}
-
-// Test whether the two functions have equivalent behaviour.
-int FunctionComparator::compare() {
- beginCompare();
-
- if (int Res = compareSignature())
- return Res;
-
- // We do a CFG-ordered walk since the actual ordering of the blocks in the
- // linked list is immaterial. Our walk starts at the entry block for both
- // functions, then takes each block from each terminator in order. As an
- // artifact, this also means that unreachable blocks are ignored.
- SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs;
- SmallPtrSet<const BasicBlock *, 32> VisitedBBs; // in terms of F1.
-
- FnLBBs.push_back(&FnL->getEntryBlock());
- FnRBBs.push_back(&FnR->getEntryBlock());
-
- VisitedBBs.insert(FnLBBs[0]);
- while (!FnLBBs.empty()) {
- const BasicBlock *BBL = FnLBBs.pop_back_val();
- const BasicBlock *BBR = FnRBBs.pop_back_val();
-
- if (int Res = cmpValues(BBL, BBR))
- return Res;
-
- if (int Res = cmpBasicBlocks(BBL, BBR))
- return Res;
-
- const Instruction *TermL = BBL->getTerminator();
- const Instruction *TermR = BBR->getTerminator();
-
- assert(TermL->getNumSuccessors() == TermR->getNumSuccessors());
- for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) {
- if (!VisitedBBs.insert(TermL->getSuccessor(i)).second)
- continue;
-
- FnLBBs.push_back(TermL->getSuccessor(i));
- FnRBBs.push_back(TermR->getSuccessor(i));
- }
- }
- return 0;
-}
-
-namespace {
-
-// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
-// hash of a sequence of 64bit ints, but the entire input does not need to be
-// available at once. This interface is necessary for functionHash because it
-// needs to accumulate the hash as the structure of the function is traversed
-// without saving these values to an intermediate buffer. This form of hashing
-// is not often needed, as usually the object to hash is just read from a
-// buffer.
-class HashAccumulator64 {
- uint64_t Hash;
-
-public:
- // Initialize to random constant, so the state isn't zero.
- HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
-
- void add(uint64_t V) {
- Hash = hashing::detail::hash_16_bytes(Hash, V);
- }
-
- // No finishing is required, because the entire hash value is used.
- uint64_t getHash() { return Hash; }
-};
-
-} // end anonymous namespace
-
-// A function hash is calculated by considering only the number of arguments and
-// whether a function is varargs, the order of basic blocks (given by the
-// successors of each basic block in depth first order), and the order of
-// opcodes of each instruction within each of these basic blocks. This mirrors
-// the strategy compare() uses to compare functions by walking the BBs in depth
-// first order and comparing each instruction in sequence. Because this hash
-// does not look at the operands, it is insensitive to things such as the
-// target of calls and the constants used in the function, which makes it useful
-// when possibly merging functions which are the same modulo constants and call
-// targets.
-FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
- HashAccumulator64 H;
- H.add(F.isVarArg());
- H.add(F.arg_size());
-
- SmallVector<const BasicBlock *, 8> BBs;
- SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
-
- // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
- // accumulating the hash of the function "structure." (BB and opcode sequence)
- BBs.push_back(&F.getEntryBlock());
- VisitedBBs.insert(BBs[0]);
- while (!BBs.empty()) {
- const BasicBlock *BB = BBs.pop_back_val();
- // This random value acts as a block header, as otherwise the partition of
- // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
- H.add(45798);
- for (auto &Inst : *BB) {
- H.add(Inst.getOpcode());
- }
- const Instruction *Term = BB->getTerminator();
- for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
- if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
- continue;
- BBs.push_back(Term->getSuccessor(i));
- }
- }
- return H.getHash();
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
deleted file mode 100644
index c9cc0990f237..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ /dev/null
@@ -1,313 +0,0 @@
-//===- lib/Transforms/Utils/FunctionImportUtils.cpp - Importing utilities -===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the FunctionImportGlobalProcessing class, used
-// to perform the necessary global value handling for function importing.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/FunctionImportUtils.h"
-#include "llvm/IR/InstIterator.h"
-using namespace llvm;
-
-/// Checks if we should import SGV as a definition, otherwise import as a
-/// declaration.
-bool FunctionImportGlobalProcessing::doImportAsDefinition(
- const GlobalValue *SGV, SetVector<GlobalValue *> *GlobalsToImport) {
-
- // Only import the globals requested for importing.
- if (!GlobalsToImport->count(const_cast<GlobalValue *>(SGV)))
- return false;
-
- assert(!isa<GlobalAlias>(SGV) &&
- "Unexpected global alias in the import list.");
-
- // Otherwise yes.
- return true;
-}
-
-bool FunctionImportGlobalProcessing::doImportAsDefinition(
- const GlobalValue *SGV) {
- if (!isPerformingImport())
- return false;
- return FunctionImportGlobalProcessing::doImportAsDefinition(SGV,
- GlobalsToImport);
-}
-
-bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
- const GlobalValue *SGV) {
- assert(SGV->hasLocalLinkage());
- // Both the imported references and the original local variable must
- // be promoted.
- if (!isPerformingImport() && !isModuleExporting())
- return false;
-
- if (isPerformingImport()) {
- assert((!GlobalsToImport->count(const_cast<GlobalValue *>(SGV)) ||
- !isNonRenamableLocal(*SGV)) &&
- "Attempting to promote non-renamable local");
- // We don't know for sure yet if we are importing this value (as either
- // a reference or a def), since we are simply walking all values in the
- // module. But by necessity if we end up importing it and it is local,
- // it must be promoted, so unconditionally promote all values in the
- // importing module.
- return true;
- }
-
- // When exporting, consult the index. We can have more than one local
- // with the same GUID, in the case of same-named locals in different but
- // same-named source files that were compiled in their respective directories
- // (so the source file name and resulting GUID is the same). Find the one
- // in this module.
- auto Summary = ImportIndex.findSummaryInModule(
- SGV->getGUID(), SGV->getParent()->getModuleIdentifier());
- assert(Summary && "Missing summary for global value when exporting");
- auto Linkage = Summary->linkage();
- if (!GlobalValue::isLocalLinkage(Linkage)) {
- assert(!isNonRenamableLocal(*SGV) &&
- "Attempting to promote non-renamable local");
- return true;
- }
-
- return false;
-}
-
-#ifndef NDEBUG
-bool FunctionImportGlobalProcessing::isNonRenamableLocal(
- const GlobalValue &GV) const {
- if (!GV.hasLocalLinkage())
- return false;
- // This needs to stay in sync with the logic in buildModuleSummaryIndex.
- if (GV.hasSection())
- return true;
- if (Used.count(const_cast<GlobalValue *>(&GV)))
- return true;
- return false;
-}
-#endif
-
-std::string FunctionImportGlobalProcessing::getName(const GlobalValue *SGV,
- bool DoPromote) {
- // For locals that must be promoted to global scope, ensure that
- // the promoted name uniquely identifies the copy in the original module,
- // using the ID assigned during combined index creation. When importing,
- // we rename all locals (not just those that are promoted) in order to
- // avoid naming conflicts between locals imported from different modules.
- if (SGV->hasLocalLinkage() && (DoPromote || isPerformingImport()))
- return ModuleSummaryIndex::getGlobalNameForLocal(
- SGV->getName(),
- ImportIndex.getModuleHash(SGV->getParent()->getModuleIdentifier()));
- return SGV->getName();
-}
-
-GlobalValue::LinkageTypes
-FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
- bool DoPromote) {
- // Any local variable that is referenced by an exported function needs
- // to be promoted to global scope. Since we don't currently know which
- // functions reference which local variables/functions, we must treat
- // all as potentially exported if this module is exporting anything.
- if (isModuleExporting()) {
- if (SGV->hasLocalLinkage() && DoPromote)
- return GlobalValue::ExternalLinkage;
- return SGV->getLinkage();
- }
-
- // Otherwise, if we aren't importing, no linkage change is needed.
- if (!isPerformingImport())
- return SGV->getLinkage();
-
- switch (SGV->getLinkage()) {
- case GlobalValue::LinkOnceODRLinkage:
- case GlobalValue::ExternalLinkage:
- // External and linkonce definitions are converted to available_externally
- // definitions upon import, so that they are available for inlining
- // and/or optimization, but are turned into declarations later
- // during the EliminateAvailableExternally pass.
- if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV))
- return GlobalValue::AvailableExternallyLinkage;
- // An imported external declaration stays external.
- return SGV->getLinkage();
-
- case GlobalValue::AvailableExternallyLinkage:
- // An imported available_externally definition converts
- // to external if imported as a declaration.
- if (!doImportAsDefinition(SGV))
- return GlobalValue::ExternalLinkage;
- // An imported available_externally declaration stays that way.
- return SGV->getLinkage();
-
- case GlobalValue::LinkOnceAnyLinkage:
- case GlobalValue::WeakAnyLinkage:
- // Can't import linkonce_any/weak_any definitions correctly, or we might
- // change the program semantics, since the linker will pick the first
- // linkonce_any/weak_any definition and importing would change the order
- // they are seen by the linker. The module linking caller needs to enforce
- // this.
- assert(!doImportAsDefinition(SGV));
- // If imported as a declaration, it becomes external_weak.
- return SGV->getLinkage();
-
- case GlobalValue::WeakODRLinkage:
- // For weak_odr linkage, there is a guarantee that all copies will be
- // equivalent, so the issue described above for weak_any does not exist,
- // and the definition can be imported. It can be treated similarly
- // to an imported externally visible global value.
- if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV))
- return GlobalValue::AvailableExternallyLinkage;
- else
- return GlobalValue::ExternalLinkage;
-
- case GlobalValue::AppendingLinkage:
- // It would be incorrect to import an appending linkage variable,
- // since it would cause global constructors/destructors to be
- // executed multiple times. This should have already been handled
- // by linkIfNeeded, and we will assert in shouldLinkFromSource
- // if we try to import, so we simply return AppendingLinkage.
- return GlobalValue::AppendingLinkage;
-
- case GlobalValue::InternalLinkage:
- case GlobalValue::PrivateLinkage:
- // If we are promoting the local to global scope, it is handled
- // similarly to a normal externally visible global.
- if (DoPromote) {
- if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV))
- return GlobalValue::AvailableExternallyLinkage;
- else
- return GlobalValue::ExternalLinkage;
- }
- // A non-promoted imported local definition stays local.
- // The ThinLTO pass will eventually force-import their definitions.
- return SGV->getLinkage();
-
- case GlobalValue::ExternalWeakLinkage:
- // External weak doesn't apply to definitions, must be a declaration.
- assert(!doImportAsDefinition(SGV));
- // Linkage stays external_weak.
- return SGV->getLinkage();
-
- case GlobalValue::CommonLinkage:
- // Linkage stays common on definitions.
- // The ThinLTO pass will eventually force-import their definitions.
- return SGV->getLinkage();
- }
-
- llvm_unreachable("unknown linkage type");
-}
-
-void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
-
- ValueInfo VI;
- if (GV.hasName()) {
- VI = ImportIndex.getValueInfo(GV.getGUID());
- // Set synthetic function entry counts.
- if (VI && ImportIndex.hasSyntheticEntryCounts()) {
- if (Function *F = dyn_cast<Function>(&GV)) {
- if (!F->isDeclaration()) {
- for (auto &S : VI.getSummaryList()) {
- FunctionSummary *FS = dyn_cast<FunctionSummary>(S->getBaseObject());
- if (FS->modulePath() == M.getModuleIdentifier()) {
- F->setEntryCount(Function::ProfileCount(FS->entryCount(),
- Function::PCT_Synthetic));
- break;
- }
- }
- }
- }
- }
- // Check the summaries to see if the symbol gets resolved to a known local
- // definition.
- if (VI && VI.isDSOLocal()) {
- GV.setDSOLocal(true);
- if (GV.hasDLLImportStorageClass())
- GV.setDLLStorageClass(GlobalValue::DefaultStorageClass);
- }
- }
-
- // Mark read/write-only variables which can be imported with specific
- // attribute. We can't internalize them now because IRMover will fail
- // to link variable definitions to their external declarations during
- // ThinLTO import. We'll internalize read-only variables later, after
- // import is finished. See internalizeGVsAfterImport.
- //
- // If global value dead stripping is not enabled in summary then
- // propagateConstants hasn't been run. We can't internalize GV
- // in such case.
- if (!GV.isDeclaration() && VI && ImportIndex.withGlobalValueDeadStripping()) {
- const auto &SL = VI.getSummaryList();
- auto *GVS = SL.empty() ? nullptr : dyn_cast<GlobalVarSummary>(SL[0].get());
- // At this stage "maybe" is "definitely"
- if (GVS && (GVS->maybeReadOnly() || GVS->maybeWriteOnly()))
- cast<GlobalVariable>(&GV)->addAttribute("thinlto-internalize");
- }
-
- bool DoPromote = false;
- if (GV.hasLocalLinkage() &&
- ((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) {
- // Save the original name string before we rename GV below.
- auto Name = GV.getName().str();
- // Once we change the name or linkage it is difficult to determine
- // again whether we should promote since shouldPromoteLocalToGlobal needs
- // to locate the summary (based on GUID from name and linkage). Therefore,
- // use DoPromote result saved above.
- GV.setName(getName(&GV, DoPromote));
- GV.setLinkage(getLinkage(&GV, DoPromote));
- if (!GV.hasLocalLinkage())
- GV.setVisibility(GlobalValue::HiddenVisibility);
-
- // If we are renaming a COMDAT leader, ensure that we record the COMDAT
- // for later renaming as well. This is required for COFF.
- if (const auto *C = GV.getComdat())
- if (C->getName() == Name)
- RenamedComdats.try_emplace(C, M.getOrInsertComdat(GV.getName()));
- } else
- GV.setLinkage(getLinkage(&GV, /* DoPromote */ false));
-
- // Remove functions imported as available externally defs from comdats,
- // as this is a declaration for the linker, and will be dropped eventually.
- // It is illegal for comdats to contain declarations.
- auto *GO = dyn_cast<GlobalObject>(&GV);
- if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
- // The IRMover should not have placed any imported declarations in
- // a comdat, so the only declaration that should be in a comdat
- // at this point would be a definition imported as available_externally.
- assert(GO->hasAvailableExternallyLinkage() &&
- "Expected comdat on definition (possibly available external)");
- GO->setComdat(nullptr);
- }
-}
-
-void FunctionImportGlobalProcessing::processGlobalsForThinLTO() {
- for (GlobalVariable &GV : M.globals())
- processGlobalForThinLTO(GV);
- for (Function &SF : M)
- processGlobalForThinLTO(SF);
- for (GlobalAlias &GA : M.aliases())
- processGlobalForThinLTO(GA);
-
- // Replace any COMDATS that required renaming (because the COMDAT leader was
- // promoted and renamed).
- if (!RenamedComdats.empty())
- for (auto &GO : M.global_objects())
- if (auto *C = GO.getComdat()) {
- auto Replacement = RenamedComdats.find(C);
- if (Replacement != RenamedComdats.end())
- GO.setComdat(Replacement->second);
- }
-}
-
-bool FunctionImportGlobalProcessing::run() {
- processGlobalsForThinLTO();
- return false;
-}
-
-bool llvm::renameModuleForThinLTO(Module &M, const ModuleSummaryIndex &Index,
- SetVector<GlobalValue *> *GlobalsToImport) {
- FunctionImportGlobalProcessing ThinLTOProcessing(M, Index, GlobalsToImport);
- return ThinLTOProcessing.run();
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
deleted file mode 100644
index a2942869130d..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ /dev/null
@@ -1,194 +0,0 @@
-//===-- GlobalStatus.cpp - Compute status info for globals -----------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/GlobalStatus.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Use.h"
-#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/AtomicOrdering.h"
-#include "llvm/Support/Casting.h"
-#include <algorithm>
-#include <cassert>
-
-using namespace llvm;
-
-/// Return the stronger of the two ordering. If the two orderings are acquire
-/// and release, then return AcquireRelease.
-///
-static AtomicOrdering strongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
- if ((X == AtomicOrdering::Acquire && Y == AtomicOrdering::Release) ||
- (Y == AtomicOrdering::Acquire && X == AtomicOrdering::Release))
- return AtomicOrdering::AcquireRelease;
- return (AtomicOrdering)std::max((unsigned)X, (unsigned)Y);
-}
-
-/// It is safe to destroy a constant iff it is only used by constants itself.
-/// Note that constants cannot be cyclic, so this test is pretty easy to
-/// implement recursively.
-///
-bool llvm::isSafeToDestroyConstant(const Constant *C) {
- if (isa<GlobalValue>(C))
- return false;
-
- if (isa<ConstantData>(C))
- return false;
-
- for (const User *U : C->users())
- if (const Constant *CU = dyn_cast<Constant>(U)) {
- if (!isSafeToDestroyConstant(CU))
- return false;
- } else
- return false;
- return true;
-}
-
-static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
- SmallPtrSetImpl<const Value *> &VisitedUsers) {
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
- if (GV->isExternallyInitialized())
- GS.StoredType = GlobalStatus::StoredOnce;
-
- for (const Use &U : V->uses()) {
- const User *UR = U.getUser();
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) {
- GS.HasNonInstructionUser = true;
-
- // If the result of the constantexpr isn't pointer type, then we won't
- // know to expect it in various places. Just reject early.
- if (!isa<PointerType>(CE->getType()))
- return true;
-
- // FIXME: Do we need to add constexpr selects to VisitedUsers?
- if (analyzeGlobalAux(CE, GS, VisitedUsers))
- return true;
- } else if (const Instruction *I = dyn_cast<Instruction>(UR)) {
- if (!GS.HasMultipleAccessingFunctions) {
- const Function *F = I->getParent()->getParent();
- if (!GS.AccessingFunction)
- GS.AccessingFunction = F;
- else if (GS.AccessingFunction != F)
- GS.HasMultipleAccessingFunctions = true;
- }
- if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
- GS.IsLoaded = true;
- // Don't hack on volatile loads.
- if (LI->isVolatile())
- return true;
- GS.Ordering = strongerOrdering(GS.Ordering, LI->getOrdering());
- } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
- // Don't allow a store OF the address, only stores TO the address.
- if (SI->getOperand(0) == V)
- return true;
-
- // Don't hack on volatile stores.
- if (SI->isVolatile())
- return true;
-
- GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering());
-
- // If this is a direct store to the global (i.e., the global is a scalar
- // value, not an aggregate), keep more specific information about
- // stores.
- if (GS.StoredType != GlobalStatus::Stored) {
- if (const GlobalVariable *GV =
- dyn_cast<GlobalVariable>(SI->getOperand(1))) {
- Value *StoredVal = SI->getOperand(0);
-
- if (Constant *C = dyn_cast<Constant>(StoredVal)) {
- if (C->isThreadDependent()) {
- // The stored value changes between threads; don't track it.
- return true;
- }
- }
-
- if (GV->hasInitializer() && StoredVal == GV->getInitializer()) {
- if (GS.StoredType < GlobalStatus::InitializerStored)
- GS.StoredType = GlobalStatus::InitializerStored;
- } else if (isa<LoadInst>(StoredVal) &&
- cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
- if (GS.StoredType < GlobalStatus::InitializerStored)
- GS.StoredType = GlobalStatus::InitializerStored;
- } else if (GS.StoredType < GlobalStatus::StoredOnce) {
- GS.StoredType = GlobalStatus::StoredOnce;
- GS.StoredOnceValue = StoredVal;
- } else if (GS.StoredType == GlobalStatus::StoredOnce &&
- GS.StoredOnceValue == StoredVal) {
- // noop.
- } else {
- GS.StoredType = GlobalStatus::Stored;
- }
- } else {
- GS.StoredType = GlobalStatus::Stored;
- }
- }
- } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) {
- // Skip over bitcasts and GEPs; we don't care about the type or offset
- // of the pointer.
- if (analyzeGlobalAux(I, GS, VisitedUsers))
- return true;
- } else if (isa<SelectInst>(I) || isa<PHINode>(I)) {
- // Look through selects and PHIs to find if the pointer is
- // conditionally accessed. Make sure we only visit an instruction
- // once; otherwise, we can get infinite recursion or exponential
- // compile time.
- if (VisitedUsers.insert(I).second)
- if (analyzeGlobalAux(I, GS, VisitedUsers))
- return true;
- } else if (isa<CmpInst>(I)) {
- GS.IsCompared = true;
- } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
- if (MTI->isVolatile())
- return true;
- if (MTI->getArgOperand(0) == V)
- GS.StoredType = GlobalStatus::Stored;
- if (MTI->getArgOperand(1) == V)
- GS.IsLoaded = true;
- } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
- assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
- if (MSI->isVolatile())
- return true;
- GS.StoredType = GlobalStatus::Stored;
- } else if (auto C = ImmutableCallSite(I)) {
- if (!C.isCallee(&U))
- return true;
- GS.IsLoaded = true;
- } else {
- return true; // Any other non-load instruction might take address!
- }
- } else if (const Constant *C = dyn_cast<Constant>(UR)) {
- GS.HasNonInstructionUser = true;
- // We might have a dead and dangling constant hanging off of here.
- if (!isSafeToDestroyConstant(C))
- return true;
- } else {
- GS.HasNonInstructionUser = true;
- // Otherwise must be some other user.
- return true;
- }
- }
-
- return false;
-}
-
-GlobalStatus::GlobalStatus() = default;
-
-bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) {
- SmallPtrSet<const Value *, 16> VisitedUsers;
- return analyzeGlobalAux(V, GS, VisitedUsers);
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp b/contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp
deleted file mode 100644
index 34c32d9c0c98..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-//===-- GuardUtils.cpp - Utils for work with guards -------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// Utils that are used to perform transformations related to guards and their
-// conditions.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/GuardUtils.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/MDBuilder.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-
-using namespace llvm;
-
-static cl::opt<uint32_t> PredicatePassBranchWeight(
- "guards-predicate-pass-branch-weight", cl::Hidden, cl::init(1 << 20),
- cl::desc("The probability of a guard failing is assumed to be the "
- "reciprocal of this value (default = 1 << 20)"));
-
-void llvm::makeGuardControlFlowExplicit(Function *DeoptIntrinsic,
- CallInst *Guard) {
- OperandBundleDef DeoptOB(*Guard->getOperandBundle(LLVMContext::OB_deopt));
- SmallVector<Value *, 4> Args(std::next(Guard->arg_begin()), Guard->arg_end());
-
- auto *CheckBB = Guard->getParent();
- auto *DeoptBlockTerm =
- SplitBlockAndInsertIfThen(Guard->getArgOperand(0), Guard, true);
-
- auto *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
-
- // SplitBlockAndInsertIfThen inserts control flow that branches to
- // DeoptBlockTerm if the condition is true. We want the opposite.
- CheckBI->swapSuccessors();
-
- CheckBI->getSuccessor(0)->setName("guarded");
- CheckBI->getSuccessor(1)->setName("deopt");
-
- if (auto *MD = Guard->getMetadata(LLVMContext::MD_make_implicit))
- CheckBI->setMetadata(LLVMContext::MD_make_implicit, MD);
-
- MDBuilder MDB(Guard->getContext());
- CheckBI->setMetadata(LLVMContext::MD_prof,
- MDB.createBranchWeights(PredicatePassBranchWeight, 1));
-
- IRBuilder<> B(DeoptBlockTerm);
- auto *DeoptCall = B.CreateCall(DeoptIntrinsic, Args, {DeoptOB}, "");
-
- if (DeoptIntrinsic->getReturnType()->isVoidTy()) {
- B.CreateRetVoid();
- } else {
- DeoptCall->setName("deoptcall");
- B.CreateRet(DeoptCall);
- }
-
- DeoptCall->setCallingConv(Guard->getCallingConv());
- DeoptBlockTerm->eraseFromParent();
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
deleted file mode 100644
index 8041e66e6c4c..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
+++ /dev/null
@@ -1,202 +0,0 @@
-//===-- ImportedFunctionsInliningStats.cpp ----------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// Generating inliner statistics for imported functions, mostly useful for
-// ThinLTO.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <iomanip>
-#include <sstream>
-using namespace llvm;
-
-ImportedFunctionsInliningStatistics::InlineGraphNode &
-ImportedFunctionsInliningStatistics::createInlineGraphNode(const Function &F) {
-
- auto &ValueLookup = NodesMap[F.getName()];
- if (!ValueLookup) {
- ValueLookup = llvm::make_unique<InlineGraphNode>();
- ValueLookup->Imported = F.getMetadata("thinlto_src_module") != nullptr;
- }
- return *ValueLookup;
-}
-
-void ImportedFunctionsInliningStatistics::recordInline(const Function &Caller,
- const Function &Callee) {
-
- InlineGraphNode &CallerNode = createInlineGraphNode(Caller);
- InlineGraphNode &CalleeNode = createInlineGraphNode(Callee);
- CalleeNode.NumberOfInlines++;
-
- if (!CallerNode.Imported && !CalleeNode.Imported) {
- // Direct inline from not imported callee to not imported caller, so we
- // don't have to add this to graph. It might be very helpful if you wanna
- // get the inliner statistics in compile step where there are no imported
- // functions. In this case the graph would be empty.
- CalleeNode.NumberOfRealInlines++;
- return;
- }
-
- CallerNode.InlinedCallees.push_back(&CalleeNode);
- if (!CallerNode.Imported) {
- // We could avoid second lookup, but it would make the code ultra ugly.
- auto It = NodesMap.find(Caller.getName());
- assert(It != NodesMap.end() && "The node should be already there.");
- // Save Caller as a starting node for traversal. The string has to be one
- // from map because Caller can disappear (and function name with it).
- NonImportedCallers.push_back(It->first());
- }
-}
-
-void ImportedFunctionsInliningStatistics::setModuleInfo(const Module &M) {
- ModuleName = M.getName();
- for (const auto &F : M.functions()) {
- if (F.isDeclaration())
- continue;
- AllFunctions++;
- ImportedFunctions += int(F.getMetadata("thinlto_src_module") != nullptr);
- }
-}
-static std::string getStatString(const char *Msg, int32_t Fraction, int32_t All,
- const char *PercentageOfMsg,
- bool LineEnd = true) {
- double Result = 0;
- if (All != 0)
- Result = 100 * static_cast<double>(Fraction) / All;
-
- std::stringstream Str;
- Str << std::setprecision(4) << Msg << ": " << Fraction << " [" << Result
- << "% of " << PercentageOfMsg << "]";
- if (LineEnd)
- Str << "\n";
- return Str.str();
-}
-
-void ImportedFunctionsInliningStatistics::dump(const bool Verbose) {
- calculateRealInlines();
- NonImportedCallers.clear();
-
- int32_t InlinedImportedFunctionsCount = 0;
- int32_t InlinedNotImportedFunctionsCount = 0;
-
- int32_t InlinedImportedFunctionsToImportingModuleCount = 0;
- int32_t InlinedNotImportedFunctionsToImportingModuleCount = 0;
-
- const auto SortedNodes = getSortedNodes();
- std::string Out;
- Out.reserve(5000);
- raw_string_ostream Ostream(Out);
-
- Ostream << "------- Dumping inliner stats for [" << ModuleName
- << "] -------\n";
-
- if (Verbose)
- Ostream << "-- List of inlined functions:\n";
-
- for (const auto &Node : SortedNodes) {
- assert(Node->second->NumberOfInlines >= Node->second->NumberOfRealInlines);
- if (Node->second->NumberOfInlines == 0)
- continue;
-
- if (Node->second->Imported) {
- InlinedImportedFunctionsCount++;
- InlinedImportedFunctionsToImportingModuleCount +=
- int(Node->second->NumberOfRealInlines > 0);
- } else {
- InlinedNotImportedFunctionsCount++;
- InlinedNotImportedFunctionsToImportingModuleCount +=
- int(Node->second->NumberOfRealInlines > 0);
- }
-
- if (Verbose)
- Ostream << "Inlined "
- << (Node->second->Imported ? "imported " : "not imported ")
- << "function [" << Node->first() << "]"
- << ": #inlines = " << Node->second->NumberOfInlines
- << ", #inlines_to_importing_module = "
- << Node->second->NumberOfRealInlines << "\n";
- }
-
- auto InlinedFunctionsCount =
- InlinedImportedFunctionsCount + InlinedNotImportedFunctionsCount;
- auto NotImportedFuncCount = AllFunctions - ImportedFunctions;
- auto ImportedNotInlinedIntoModule =
- ImportedFunctions - InlinedImportedFunctionsToImportingModuleCount;
-
- Ostream << "-- Summary:\n"
- << "All functions: " << AllFunctions
- << ", imported functions: " << ImportedFunctions << "\n"
- << getStatString("inlined functions", InlinedFunctionsCount,
- AllFunctions, "all functions")
- << getStatString("imported functions inlined anywhere",
- InlinedImportedFunctionsCount, ImportedFunctions,
- "imported functions")
- << getStatString("imported functions inlined into importing module",
- InlinedImportedFunctionsToImportingModuleCount,
- ImportedFunctions, "imported functions",
- /*LineEnd=*/false)
- << getStatString(", remaining", ImportedNotInlinedIntoModule,
- ImportedFunctions, "imported functions")
- << getStatString("non-imported functions inlined anywhere",
- InlinedNotImportedFunctionsCount,
- NotImportedFuncCount, "non-imported functions")
- << getStatString(
- "non-imported functions inlined into importing module",
- InlinedNotImportedFunctionsToImportingModuleCount,
- NotImportedFuncCount, "non-imported functions");
- Ostream.flush();
- dbgs() << Out;
-}
-
-void ImportedFunctionsInliningStatistics::calculateRealInlines() {
- // Removing duplicated Callers.
- llvm::sort(NonImportedCallers);
- NonImportedCallers.erase(
- std::unique(NonImportedCallers.begin(), NonImportedCallers.end()),
- NonImportedCallers.end());
-
- for (const auto &Name : NonImportedCallers) {
- auto &Node = *NodesMap[Name];
- if (!Node.Visited)
- dfs(Node);
- }
-}
-
-void ImportedFunctionsInliningStatistics::dfs(InlineGraphNode &GraphNode) {
- assert(!GraphNode.Visited);
- GraphNode.Visited = true;
- for (auto *const InlinedFunctionNode : GraphNode.InlinedCallees) {
- InlinedFunctionNode->NumberOfRealInlines++;
- if (!InlinedFunctionNode->Visited)
- dfs(*InlinedFunctionNode);
- }
-}
-
-ImportedFunctionsInliningStatistics::SortedNodesTy
-ImportedFunctionsInliningStatistics::getSortedNodes() {
- SortedNodesTy SortedNodes;
- SortedNodes.reserve(NodesMap.size());
- for (const NodesMapTy::value_type& Node : NodesMap)
- SortedNodes.push_back(&Node);
-
- llvm::sort(SortedNodes, [&](const SortedNodesTy::value_type &Lhs,
- const SortedNodesTy::value_type &Rhs) {
- if (Lhs->second->NumberOfInlines != Rhs->second->NumberOfInlines)
- return Lhs->second->NumberOfInlines > Rhs->second->NumberOfInlines;
- if (Lhs->second->NumberOfRealInlines != Rhs->second->NumberOfRealInlines)
- return Lhs->second->NumberOfRealInlines >
- Rhs->second->NumberOfRealInlines;
- return Lhs->first() < Rhs->first();
- });
- return SortedNodes;
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
deleted file mode 100644
index a7f0f7ac5d61..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ /dev/null
@@ -1,2417 +0,0 @@
-//===- InlineFunction.cpp - Code to perform function inlining -------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements inlining of a function into a call site, resolving
-// parameters and the return value as appropriate.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/iterator_range.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Analysis/VectorUtils.h"
-#include "llvm/IR/Argument.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <iterator>
-#include <limits>
-#include <string>
-#include <utility>
-#include <vector>
-
-using namespace llvm;
-using ProfileCount = Function::ProfileCount;
-
-static cl::opt<bool>
-EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true),
- cl::Hidden,
- cl::desc("Convert noalias attributes to metadata during inlining."));
-
-static cl::opt<bool>
-PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
- cl::init(true), cl::Hidden,
- cl::desc("Convert align attributes to assumptions during inlining."));
-
-llvm::InlineResult llvm::InlineFunction(CallBase *CB, InlineFunctionInfo &IFI,
- AAResults *CalleeAAR,
- bool InsertLifetime) {
- return InlineFunction(CallSite(CB), IFI, CalleeAAR, InsertLifetime);
-}
-
-namespace {
-
- /// A class for recording information about inlining a landing pad.
- class LandingPadInliningInfo {
- /// Destination of the invoke's unwind.
- BasicBlock *OuterResumeDest;
-
- /// Destination for the callee's resume.
- BasicBlock *InnerResumeDest = nullptr;
-
- /// LandingPadInst associated with the invoke.
- LandingPadInst *CallerLPad = nullptr;
-
- /// PHI for EH values from landingpad insts.
- PHINode *InnerEHValuesPHI = nullptr;
-
- SmallVector<Value*, 8> UnwindDestPHIValues;
-
- public:
- LandingPadInliningInfo(InvokeInst *II)
- : OuterResumeDest(II->getUnwindDest()) {
- // If there are PHI nodes in the unwind destination block, we need to keep
- // track of which values came into them from the invoke before removing
- // the edge from this block.
- BasicBlock *InvokeBB = II->getParent();
- BasicBlock::iterator I = OuterResumeDest->begin();
- for (; isa<PHINode>(I); ++I) {
- // Save the value to use for this edge.
- PHINode *PHI = cast<PHINode>(I);
- UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
- }
-
- CallerLPad = cast<LandingPadInst>(I);
- }
-
- /// The outer unwind destination is the target of
- /// unwind edges introduced for calls within the inlined function.
- BasicBlock *getOuterResumeDest() const {
- return OuterResumeDest;
- }
-
- BasicBlock *getInnerResumeDest();
-
- LandingPadInst *getLandingPadInst() const { return CallerLPad; }
-
- /// Forward the 'resume' instruction to the caller's landing pad block.
- /// When the landing pad block has only one predecessor, this is
- /// a simple branch. When there is more than one predecessor, we need to
- /// split the landing pad block after the landingpad instruction and jump
- /// to there.
- void forwardResume(ResumeInst *RI,
- SmallPtrSetImpl<LandingPadInst*> &InlinedLPads);
-
- /// Add incoming-PHI values to the unwind destination block for the given
- /// basic block, using the values for the original invoke's source block.
- void addIncomingPHIValuesFor(BasicBlock *BB) const {
- addIncomingPHIValuesForInto(BB, OuterResumeDest);
- }
-
- void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
- BasicBlock::iterator I = dest->begin();
- for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
- PHINode *phi = cast<PHINode>(I);
- phi->addIncoming(UnwindDestPHIValues[i], src);
- }
- }
- };
-
-} // end anonymous namespace
-
-/// Get or create a target for the branch from ResumeInsts.
-BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
- if (InnerResumeDest) return InnerResumeDest;
-
- // Split the landing pad.
- BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator();
- InnerResumeDest =
- OuterResumeDest->splitBasicBlock(SplitPoint,
- OuterResumeDest->getName() + ".body");
-
- // The number of incoming edges we expect to the inner landing pad.
- const unsigned PHICapacity = 2;
-
- // Create corresponding new PHIs for all the PHIs in the outer landing pad.
- Instruction *InsertPoint = &InnerResumeDest->front();
- BasicBlock::iterator I = OuterResumeDest->begin();
- for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
- PHINode *OuterPHI = cast<PHINode>(I);
- PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity,
- OuterPHI->getName() + ".lpad-body",
- InsertPoint);
- OuterPHI->replaceAllUsesWith(InnerPHI);
- InnerPHI->addIncoming(OuterPHI, OuterResumeDest);
- }
-
- // Create a PHI for the exception values.
- InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity,
- "eh.lpad-body", InsertPoint);
- CallerLPad->replaceAllUsesWith(InnerEHValuesPHI);
- InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest);
-
- // All done.
- return InnerResumeDest;
-}
-
-/// Forward the 'resume' instruction to the caller's landing pad block.
-/// When the landing pad block has only one predecessor, this is a simple
-/// branch. When there is more than one predecessor, we need to split the
-/// landing pad block after the landingpad instruction and jump to there.
-void LandingPadInliningInfo::forwardResume(
- ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) {
- BasicBlock *Dest = getInnerResumeDest();
- BasicBlock *Src = RI->getParent();
-
- BranchInst::Create(Dest, Src);
-
- // Update the PHIs in the destination. They were inserted in an order which
- // makes this work.
- addIncomingPHIValuesForInto(Src, Dest);
-
- InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
- RI->eraseFromParent();
-}
-
-/// Helper for getUnwindDestToken/getUnwindDestTokenHelper.
-static Value *getParentPad(Value *EHPad) {
- if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad))
- return FPI->getParentPad();
- return cast<CatchSwitchInst>(EHPad)->getParentPad();
-}
-
-using UnwindDestMemoTy = DenseMap<Instruction *, Value *>;
-
-/// Helper for getUnwindDestToken that does the descendant-ward part of
-/// the search.
-static Value *getUnwindDestTokenHelper(Instruction *EHPad,
- UnwindDestMemoTy &MemoMap) {
- SmallVector<Instruction *, 8> Worklist(1, EHPad);
-
- while (!Worklist.empty()) {
- Instruction *CurrentPad = Worklist.pop_back_val();
- // We only put pads on the worklist that aren't in the MemoMap. When
- // we find an unwind dest for a pad we may update its ancestors, but
- // the queue only ever contains uncles/great-uncles/etc. of CurrentPad,
- // so they should never get updated while queued on the worklist.
- assert(!MemoMap.count(CurrentPad));
- Value *UnwindDestToken = nullptr;
- if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) {
- if (CatchSwitch->hasUnwindDest()) {
- UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI();
- } else {
- // Catchswitch doesn't have a 'nounwind' variant, and one might be
- // annotated as "unwinds to caller" when really it's nounwind (see
- // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the
- // parent's unwind dest from this. We can check its catchpads'
- // descendants, since they might include a cleanuppad with an
- // "unwinds to caller" cleanupret, which can be trusted.
- for (auto HI = CatchSwitch->handler_begin(),
- HE = CatchSwitch->handler_end();
- HI != HE && !UnwindDestToken; ++HI) {
- BasicBlock *HandlerBlock = *HI;
- auto *CatchPad = cast<CatchPadInst>(HandlerBlock->getFirstNonPHI());
- for (User *Child : CatchPad->users()) {
- // Intentionally ignore invokes here -- since the catchswitch is
- // marked "unwind to caller", it would be a verifier error if it
- // contained an invoke which unwinds out of it, so any invoke we'd
- // encounter must unwind to some child of the catch.
- if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child))
- continue;
-
- Instruction *ChildPad = cast<Instruction>(Child);
- auto Memo = MemoMap.find(ChildPad);
- if (Memo == MemoMap.end()) {
- // Haven't figured out this child pad yet; queue it.
- Worklist.push_back(ChildPad);
- continue;
- }
- // We've already checked this child, but might have found that
- // it offers no proof either way.
- Value *ChildUnwindDestToken = Memo->second;
- if (!ChildUnwindDestToken)
- continue;
- // We already know the child's unwind dest, which can either
- // be ConstantTokenNone to indicate unwind to caller, or can
- // be another child of the catchpad. Only the former indicates
- // the unwind dest of the catchswitch.
- if (isa<ConstantTokenNone>(ChildUnwindDestToken)) {
- UnwindDestToken = ChildUnwindDestToken;
- break;
- }
- assert(getParentPad(ChildUnwindDestToken) == CatchPad);
- }
- }
- }
- } else {
- auto *CleanupPad = cast<CleanupPadInst>(CurrentPad);
- for (User *U : CleanupPad->users()) {
- if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) {
- if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest())
- UnwindDestToken = RetUnwindDest->getFirstNonPHI();
- else
- UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext());
- break;
- }
- Value *ChildUnwindDestToken;
- if (auto *Invoke = dyn_cast<InvokeInst>(U)) {
- ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI();
- } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) {
- Instruction *ChildPad = cast<Instruction>(U);
- auto Memo = MemoMap.find(ChildPad);
- if (Memo == MemoMap.end()) {
- // Haven't resolved this child yet; queue it and keep searching.
- Worklist.push_back(ChildPad);
- continue;
- }
- // We've checked this child, but still need to ignore it if it
- // had no proof either way.
- ChildUnwindDestToken = Memo->second;
- if (!ChildUnwindDestToken)
- continue;
- } else {
- // Not a relevant user of the cleanuppad
- continue;
- }
- // In a well-formed program, the child/invoke must either unwind to
- // an(other) child of the cleanup, or exit the cleanup. In the
- // first case, continue searching.
- if (isa<Instruction>(ChildUnwindDestToken) &&
- getParentPad(ChildUnwindDestToken) == CleanupPad)
- continue;
- UnwindDestToken = ChildUnwindDestToken;
- break;
- }
- }
- // If we haven't found an unwind dest for CurrentPad, we may have queued its
- // children, so move on to the next in the worklist.
- if (!UnwindDestToken)
- continue;
-
- // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits
- // any ancestors of CurrentPad up to but not including UnwindDestToken's
- // parent pad. Record this in the memo map, and check to see if the
- // original EHPad being queried is one of the ones exited.
- Value *UnwindParent;
- if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken))
- UnwindParent = getParentPad(UnwindPad);
- else
- UnwindParent = nullptr;
- bool ExitedOriginalPad = false;
- for (Instruction *ExitedPad = CurrentPad;
- ExitedPad && ExitedPad != UnwindParent;
- ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) {
- // Skip over catchpads since they just follow their catchswitches.
- if (isa<CatchPadInst>(ExitedPad))
- continue;
- MemoMap[ExitedPad] = UnwindDestToken;
- ExitedOriginalPad |= (ExitedPad == EHPad);
- }
-
- if (ExitedOriginalPad)
- return UnwindDestToken;
-
- // Continue the search.
- }
-
- // No definitive information is contained within this funclet.
- return nullptr;
-}
-
-/// Given an EH pad, find where it unwinds. If it unwinds to an EH pad,
-/// return that pad instruction. If it unwinds to caller, return
-/// ConstantTokenNone. If it does not have a definitive unwind destination,
-/// return nullptr.
-///
-/// This routine gets invoked for calls in funclets in inlinees when inlining
-/// an invoke. Since many funclets don't have calls inside them, it's queried
-/// on-demand rather than building a map of pads to unwind dests up front.
-/// Determining a funclet's unwind dest may require recursively searching its
-/// descendants, and also ancestors and cousins if the descendants don't provide
-/// an answer. Since most funclets will have their unwind dest immediately
-/// available as the unwind dest of a catchswitch or cleanupret, this routine
-/// searches top-down from the given pad and then up. To avoid worst-case
-/// quadratic run-time given that approach, it uses a memo map to avoid
-/// re-processing funclet trees. The callers that rewrite the IR as they go
-/// take advantage of this, for correctness, by checking/forcing rewritten
-/// pads' entries to match the original callee view.
-static Value *getUnwindDestToken(Instruction *EHPad,
- UnwindDestMemoTy &MemoMap) {
- // Catchpads unwind to the same place as their catchswitch;
- // redirct any queries on catchpads so the code below can
- // deal with just catchswitches and cleanuppads.
- if (auto *CPI = dyn_cast<CatchPadInst>(EHPad))
- EHPad = CPI->getCatchSwitch();
-
- // Check if we've already determined the unwind dest for this pad.
- auto Memo = MemoMap.find(EHPad);
- if (Memo != MemoMap.end())
- return Memo->second;
-
- // Search EHPad and, if necessary, its descendants.
- Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap);
- assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0));
- if (UnwindDestToken)
- return UnwindDestToken;
-
- // No information is available for this EHPad from itself or any of its
- // descendants. An unwind all the way out to a pad in the caller would
- // need also to agree with the unwind dest of the parent funclet, so
- // search up the chain to try to find a funclet with information. Put
- // null entries in the memo map to avoid re-processing as we go up.
- MemoMap[EHPad] = nullptr;
-#ifndef NDEBUG
- SmallPtrSet<Instruction *, 4> TempMemos;
- TempMemos.insert(EHPad);
-#endif
- Instruction *LastUselessPad = EHPad;
- Value *AncestorToken;
- for (AncestorToken = getParentPad(EHPad);
- auto *AncestorPad = dyn_cast<Instruction>(AncestorToken);
- AncestorToken = getParentPad(AncestorToken)) {
- // Skip over catchpads since they just follow their catchswitches.
- if (isa<CatchPadInst>(AncestorPad))
- continue;
- // If the MemoMap had an entry mapping AncestorPad to nullptr, since we
- // haven't yet called getUnwindDestTokenHelper for AncestorPad in this
- // call to getUnwindDestToken, that would mean that AncestorPad had no
- // information in itself, its descendants, or its ancestors. If that
- // were the case, then we should also have recorded the lack of information
- // for the descendant that we're coming from. So assert that we don't
- // find a null entry in the MemoMap for AncestorPad.
- assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]);
- auto AncestorMemo = MemoMap.find(AncestorPad);
- if (AncestorMemo == MemoMap.end()) {
- UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap);
- } else {
- UnwindDestToken = AncestorMemo->second;
- }
- if (UnwindDestToken)
- break;
- LastUselessPad = AncestorPad;
- MemoMap[LastUselessPad] = nullptr;
-#ifndef NDEBUG
- TempMemos.insert(LastUselessPad);
-#endif
- }
-
- // We know that getUnwindDestTokenHelper was called on LastUselessPad and
- // returned nullptr (and likewise for EHPad and any of its ancestors up to
- // LastUselessPad), so LastUselessPad has no information from below. Since
- // getUnwindDestTokenHelper must investigate all downward paths through
- // no-information nodes to prove that a node has no information like this,
- // and since any time it finds information it records it in the MemoMap for
- // not just the immediately-containing funclet but also any ancestors also
- // exited, it must be the case that, walking downward from LastUselessPad,
- // visiting just those nodes which have not been mapped to an unwind dest
- // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since
- // they are just used to keep getUnwindDestTokenHelper from repeating work),
- // any node visited must have been exhaustively searched with no information
- // for it found.
- SmallVector<Instruction *, 8> Worklist(1, LastUselessPad);
- while (!Worklist.empty()) {
- Instruction *UselessPad = Worklist.pop_back_val();
- auto Memo = MemoMap.find(UselessPad);
- if (Memo != MemoMap.end() && Memo->second) {
- // Here the name 'UselessPad' is a bit of a misnomer, because we've found
- // that it is a funclet that does have information about unwinding to
- // a particular destination; its parent was a useless pad.
- // Since its parent has no information, the unwind edge must not escape
- // the parent, and must target a sibling of this pad. This local unwind
- // gives us no information about EHPad. Leave it and the subtree rooted
- // at it alone.
- assert(getParentPad(Memo->second) == getParentPad(UselessPad));
- continue;
- }
- // We know we don't have information for UselesPad. If it has an entry in
- // the MemoMap (mapping it to nullptr), it must be one of the TempMemos
- // added on this invocation of getUnwindDestToken; if a previous invocation
- // recorded nullptr, it would have had to prove that the ancestors of
- // UselessPad, which include LastUselessPad, had no information, and that
- // in turn would have required proving that the descendants of
- // LastUselesPad, which include EHPad, have no information about
- // LastUselessPad, which would imply that EHPad was mapped to nullptr in
- // the MemoMap on that invocation, which isn't the case if we got here.
- assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad));
- // Assert as we enumerate users that 'UselessPad' doesn't have any unwind
- // information that we'd be contradicting by making a map entry for it
- // (which is something that getUnwindDestTokenHelper must have proved for
- // us to get here). Just assert on is direct users here; the checks in
- // this downward walk at its descendants will verify that they don't have
- // any unwind edges that exit 'UselessPad' either (i.e. they either have no
- // unwind edges or unwind to a sibling).
- MemoMap[UselessPad] = UnwindDestToken;
- if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) {
- assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad");
- for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) {
- auto *CatchPad = HandlerBlock->getFirstNonPHI();
- for (User *U : CatchPad->users()) {
- assert(
- (!isa<InvokeInst>(U) ||
- (getParentPad(
- cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==
- CatchPad)) &&
- "Expected useless pad");
- if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))
- Worklist.push_back(cast<Instruction>(U));
- }
- }
- } else {
- assert(isa<CleanupPadInst>(UselessPad));
- for (User *U : UselessPad->users()) {
- assert(!isa<CleanupReturnInst>(U) && "Expected useless pad");
- assert((!isa<InvokeInst>(U) ||
- (getParentPad(
- cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==
- UselessPad)) &&
- "Expected useless pad");
- if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))
- Worklist.push_back(cast<Instruction>(U));
- }
- }
- }
-
- return UnwindDestToken;
-}
-
-/// When we inline a basic block into an invoke,
-/// we have to turn all of the calls that can throw into invokes.
-/// This function analyze BB to see if there are any calls, and if so,
-/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
-/// nodes in that block with the values specified in InvokeDestPHIValues.
-static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
- BasicBlock *BB, BasicBlock *UnwindEdge,
- UnwindDestMemoTy *FuncletUnwindMap = nullptr) {
- for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
- Instruction *I = &*BBI++;
-
- // We only need to check for function calls: inlined invoke
- // instructions require no special handling.
- CallInst *CI = dyn_cast<CallInst>(I);
-
- if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))
- continue;
-
- // We do not need to (and in fact, cannot) convert possibly throwing calls
- // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into
- // invokes. The caller's "segment" of the deoptimization continuation
- // attached to the newly inlined @llvm.experimental_deoptimize
- // (resp. @llvm.experimental.guard) call should contain the exception
- // handling logic, if any.
- if (auto *F = CI->getCalledFunction())
- if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize ||
- F->getIntrinsicID() == Intrinsic::experimental_guard)
- continue;
-
- if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) {
- // This call is nested inside a funclet. If that funclet has an unwind
- // destination within the inlinee, then unwinding out of this call would
- // be UB. Rewriting this call to an invoke which targets the inlined
- // invoke's unwind dest would give the call's parent funclet multiple
- // unwind destinations, which is something that subsequent EH table
- // generation can't handle and that the veirifer rejects. So when we
- // see such a call, leave it as a call.
- auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]);
- Value *UnwindDestToken =
- getUnwindDestToken(FuncletPad, *FuncletUnwindMap);
- if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))
- continue;
-#ifndef NDEBUG
- Instruction *MemoKey;
- if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))
- MemoKey = CatchPad->getCatchSwitch();
- else
- MemoKey = FuncletPad;
- assert(FuncletUnwindMap->count(MemoKey) &&
- (*FuncletUnwindMap)[MemoKey] == UnwindDestToken &&
- "must get memoized to avoid confusing later searches");
-#endif // NDEBUG
- }
-
- changeToInvokeAndSplitBasicBlock(CI, UnwindEdge);
- return BB;
- }
- return nullptr;
-}
-
-/// If we inlined an invoke site, we need to convert calls
-/// in the body of the inlined function into invokes.
-///
-/// II is the invoke instruction being inlined. FirstNewBlock is the first
-/// block of the inlined code (the last block is the end of the function),
-/// and InlineCodeInfo is information about the code that got inlined.
-static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock,
- ClonedCodeInfo &InlinedCodeInfo) {
- BasicBlock *InvokeDest = II->getUnwindDest();
-
- Function *Caller = FirstNewBlock->getParent();
-
- // The inlined code is currently at the end of the function, scan from the
- // start of the inlined code to its end, checking for stuff we need to
- // rewrite.
- LandingPadInliningInfo Invoke(II);
-
- // Get all of the inlined landing pad instructions.
- SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
- for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end();
- I != E; ++I)
- if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
- InlinedLPads.insert(II->getLandingPadInst());
-
- // Append the clauses from the outer landing pad instruction into the inlined
- // landing pad instructions.
- LandingPadInst *OuterLPad = Invoke.getLandingPadInst();
- for (LandingPadInst *InlinedLPad : InlinedLPads) {
- unsigned OuterNum = OuterLPad->getNumClauses();
- InlinedLPad->reserveClauses(OuterNum);
- for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx)
- InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
- if (OuterLPad->isCleanup())
- InlinedLPad->setCleanup(true);
- }
-
- for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
- BB != E; ++BB) {
- if (InlinedCodeInfo.ContainsCalls)
- if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
- &*BB, Invoke.getOuterResumeDest()))
- // Update any PHI nodes in the exceptional block to indicate that there
- // is now a new entry in them.
- Invoke.addIncomingPHIValuesFor(NewBB);
-
- // Forward any resumes that are remaining here.
- if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
- Invoke.forwardResume(RI, InlinedLPads);
- }
-
- // Now that everything is happy, we have one final detail. The PHI nodes in
- // the exception destination block still have entries due to the original
- // invoke instruction. Eliminate these entries (which might even delete the
- // PHI node) now.
- InvokeDest->removePredecessor(II->getParent());
-}
-
-/// If we inlined an invoke site, we need to convert calls
-/// in the body of the inlined function into invokes.
-///
-/// II is the invoke instruction being inlined. FirstNewBlock is the first
-/// block of the inlined code (the last block is the end of the function),
-/// and InlineCodeInfo is information about the code that got inlined.
-static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
- ClonedCodeInfo &InlinedCodeInfo) {
- BasicBlock *UnwindDest = II->getUnwindDest();
- Function *Caller = FirstNewBlock->getParent();
-
- assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!");
-
- // If there are PHI nodes in the unwind destination block, we need to keep
- // track of which values came into them from the invoke before removing the
- // edge from this block.
- SmallVector<Value *, 8> UnwindDestPHIValues;
- BasicBlock *InvokeBB = II->getParent();
- for (Instruction &I : *UnwindDest) {
- // Save the value to use for this edge.
- PHINode *PHI = dyn_cast<PHINode>(&I);
- if (!PHI)
- break;
- UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
- }
-
- // Add incoming-PHI values to the unwind destination block for the given basic
- // block, using the values for the original invoke's source block.
- auto UpdatePHINodes = [&](BasicBlock *Src) {
- BasicBlock::iterator I = UnwindDest->begin();
- for (Value *V : UnwindDestPHIValues) {
- PHINode *PHI = cast<PHINode>(I);
- PHI->addIncoming(V, Src);
- ++I;
- }
- };
-
- // This connects all the instructions which 'unwind to caller' to the invoke
- // destination.
- UnwindDestMemoTy FuncletUnwindMap;
- for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
- BB != E; ++BB) {
- if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
- if (CRI->unwindsToCaller()) {
- auto *CleanupPad = CRI->getCleanupPad();
- CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI);
- CRI->eraseFromParent();
- UpdatePHINodes(&*BB);
- // Finding a cleanupret with an unwind destination would confuse
- // subsequent calls to getUnwindDestToken, so map the cleanuppad
- // to short-circuit any such calls and recognize this as an "unwind
- // to caller" cleanup.
- assert(!FuncletUnwindMap.count(CleanupPad) ||
- isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad]));
- FuncletUnwindMap[CleanupPad] =
- ConstantTokenNone::get(Caller->getContext());
- }
- }
-
- Instruction *I = BB->getFirstNonPHI();
- if (!I->isEHPad())
- continue;
-
- Instruction *Replacement = nullptr;
- if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
- if (CatchSwitch->unwindsToCaller()) {
- Value *UnwindDestToken;
- if (auto *ParentPad =
- dyn_cast<Instruction>(CatchSwitch->getParentPad())) {
- // This catchswitch is nested inside another funclet. If that
- // funclet has an unwind destination within the inlinee, then
- // unwinding out of this catchswitch would be UB. Rewriting this
- // catchswitch to unwind to the inlined invoke's unwind dest would
- // give the parent funclet multiple unwind destinations, which is
- // something that subsequent EH table generation can't handle and
- // that the veirifer rejects. So when we see such a call, leave it
- // as "unwind to caller".
- UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap);
- if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))
- continue;
- } else {
- // This catchswitch has no parent to inherit constraints from, and
- // none of its descendants can have an unwind edge that exits it and
- // targets another funclet in the inlinee. It may or may not have a
- // descendant that definitively has an unwind to caller. In either
- // case, we'll have to assume that any unwinds out of it may need to
- // be routed to the caller, so treat it as though it has a definitive
- // unwind to caller.
- UnwindDestToken = ConstantTokenNone::get(Caller->getContext());
- }
- auto *NewCatchSwitch = CatchSwitchInst::Create(
- CatchSwitch->getParentPad(), UnwindDest,
- CatchSwitch->getNumHandlers(), CatchSwitch->getName(),
- CatchSwitch);
- for (BasicBlock *PadBB : CatchSwitch->handlers())
- NewCatchSwitch->addHandler(PadBB);
- // Propagate info for the old catchswitch over to the new one in
- // the unwind map. This also serves to short-circuit any subsequent
- // checks for the unwind dest of this catchswitch, which would get
- // confused if they found the outer handler in the callee.
- FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken;
- Replacement = NewCatchSwitch;
- }
- } else if (!isa<FuncletPadInst>(I)) {
- llvm_unreachable("unexpected EHPad!");
- }
-
- if (Replacement) {
- Replacement->takeName(I);
- I->replaceAllUsesWith(Replacement);
- I->eraseFromParent();
- UpdatePHINodes(&*BB);
- }
- }
-
- if (InlinedCodeInfo.ContainsCalls)
- for (Function::iterator BB = FirstNewBlock->getIterator(),
- E = Caller->end();
- BB != E; ++BB)
- if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
- &*BB, UnwindDest, &FuncletUnwindMap))
- // Update any PHI nodes in the exceptional block to indicate that there
- // is now a new entry in them.
- UpdatePHINodes(NewBB);
-
- // Now that everything is happy, we have one final detail. The PHI nodes in
- // the exception destination block still have entries due to the original
- // invoke instruction. Eliminate these entries (which might even delete the
- // PHI node) now.
- UnwindDest->removePredecessor(InvokeBB);
-}
-
-/// When inlining a call site that has !llvm.mem.parallel_loop_access or
-/// llvm.access.group metadata, that metadata should be propagated to all
-/// memory-accessing cloned instructions.
-static void PropagateParallelLoopAccessMetadata(CallSite CS,
- ValueToValueMapTy &VMap) {
- MDNode *M =
- CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
- MDNode *CallAccessGroup =
- CS.getInstruction()->getMetadata(LLVMContext::MD_access_group);
- if (!M && !CallAccessGroup)
- return;
-
- for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
- VMI != VMIE; ++VMI) {
- if (!VMI->second)
- continue;
-
- Instruction *NI = dyn_cast<Instruction>(VMI->second);
- if (!NI)
- continue;
-
- if (M) {
- if (MDNode *PM =
- NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) {
- M = MDNode::concatenate(PM, M);
- NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
- } else if (NI->mayReadOrWriteMemory()) {
- NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
- }
- }
-
- if (NI->mayReadOrWriteMemory()) {
- MDNode *UnitedAccGroups = uniteAccessGroups(
- NI->getMetadata(LLVMContext::MD_access_group), CallAccessGroup);
- NI->setMetadata(LLVMContext::MD_access_group, UnitedAccGroups);
- }
- }
-}
-
-/// When inlining a function that contains noalias scope metadata,
-/// this metadata needs to be cloned so that the inlined blocks
-/// have different "unique scopes" at every call site. Were this not done, then
-/// aliasing scopes from a function inlined into a caller multiple times could
-/// not be differentiated (and this would lead to miscompiles because the
-/// non-aliasing property communicated by the metadata could have
-/// call-site-specific control dependencies).
-static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
- const Function *CalledFunc = CS.getCalledFunction();
- SetVector<const MDNode *> MD;
-
- // Note: We could only clone the metadata if it is already used in the
- // caller. I'm omitting that check here because it might confuse
- // inter-procedural alias analysis passes. We can revisit this if it becomes
- // an efficiency or overhead problem.
-
- for (const BasicBlock &I : *CalledFunc)
- for (const Instruction &J : I) {
- if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope))
- MD.insert(M);
- if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias))
- MD.insert(M);
- }
-
- if (MD.empty())
- return;
-
- // Walk the existing metadata, adding the complete (perhaps cyclic) chain to
- // the set.
- SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end());
- while (!Queue.empty()) {
- const MDNode *M = cast<MDNode>(Queue.pop_back_val());
- for (unsigned i = 0, ie = M->getNumOperands(); i != ie; ++i)
- if (const MDNode *M1 = dyn_cast<MDNode>(M->getOperand(i)))
- if (MD.insert(M1))
- Queue.push_back(M1);
- }
-
- // Now we have a complete set of all metadata in the chains used to specify
- // the noalias scopes and the lists of those scopes.
- SmallVector<TempMDTuple, 16> DummyNodes;
- DenseMap<const MDNode *, TrackingMDNodeRef> MDMap;
- for (const MDNode *I : MD) {
- DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None));
- MDMap[I].reset(DummyNodes.back().get());
- }
-
- // Create new metadata nodes to replace the dummy nodes, replacing old
- // metadata references with either a dummy node or an already-created new
- // node.
- for (const MDNode *I : MD) {
- SmallVector<Metadata *, 4> NewOps;
- for (unsigned i = 0, ie = I->getNumOperands(); i != ie; ++i) {
- const Metadata *V = I->getOperand(i);
- if (const MDNode *M = dyn_cast<MDNode>(V))
- NewOps.push_back(MDMap[M]);
- else
- NewOps.push_back(const_cast<Metadata *>(V));
- }
-
- MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps);
- MDTuple *TempM = cast<MDTuple>(MDMap[I]);
- assert(TempM->isTemporary() && "Expected temporary node");
-
- TempM->replaceAllUsesWith(NewM);
- }
-
- // Now replace the metadata in the new inlined instructions with the
- // repacements from the map.
- for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
- VMI != VMIE; ++VMI) {
- if (!VMI->second)
- continue;
-
- Instruction *NI = dyn_cast<Instruction>(VMI->second);
- if (!NI)
- continue;
-
- if (MDNode *M = NI->getMetadata(LLVMContext::MD_alias_scope)) {
- MDNode *NewMD = MDMap[M];
- // If the call site also had alias scope metadata (a list of scopes to
- // which instructions inside it might belong), propagate those scopes to
- // the inlined instructions.
- if (MDNode *CSM =
- CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope))
- NewMD = MDNode::concatenate(NewMD, CSM);
- NI->setMetadata(LLVMContext::MD_alias_scope, NewMD);
- } else if (NI->mayReadOrWriteMemory()) {
- if (MDNode *M =
- CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope))
- NI->setMetadata(LLVMContext::MD_alias_scope, M);
- }
-
- if (MDNode *M = NI->getMetadata(LLVMContext::MD_noalias)) {
- MDNode *NewMD = MDMap[M];
- // If the call site also had noalias metadata (a list of scopes with
- // which instructions inside it don't alias), propagate those scopes to
- // the inlined instructions.
- if (MDNode *CSM =
- CS.getInstruction()->getMetadata(LLVMContext::MD_noalias))
- NewMD = MDNode::concatenate(NewMD, CSM);
- NI->setMetadata(LLVMContext::MD_noalias, NewMD);
- } else if (NI->mayReadOrWriteMemory()) {
- if (MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_noalias))
- NI->setMetadata(LLVMContext::MD_noalias, M);
- }
- }
-}
-
-/// If the inlined function has noalias arguments,
-/// then add new alias scopes for each noalias argument, tag the mapped noalias
-/// parameters with noalias metadata specifying the new scope, and tag all
-/// non-derived loads, stores and memory intrinsics with the new alias scopes.
-static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
- const DataLayout &DL, AAResults *CalleeAAR) {
- if (!EnableNoAliasConversion)
- return;
-
- const Function *CalledFunc = CS.getCalledFunction();
- SmallVector<const Argument *, 4> NoAliasArgs;
-
- for (const Argument &Arg : CalledFunc->args())
- if (Arg.hasNoAliasAttr() && !Arg.use_empty())
- NoAliasArgs.push_back(&Arg);
-
- if (NoAliasArgs.empty())
- return;
-
- // To do a good job, if a noalias variable is captured, we need to know if
- // the capture point dominates the particular use we're considering.
- DominatorTree DT;
- DT.recalculate(const_cast<Function&>(*CalledFunc));
-
- // noalias indicates that pointer values based on the argument do not alias
- // pointer values which are not based on it. So we add a new "scope" for each
- // noalias function argument. Accesses using pointers based on that argument
- // become part of that alias scope, accesses using pointers not based on that
- // argument are tagged as noalias with that scope.
-
- DenseMap<const Argument *, MDNode *> NewScopes;
- MDBuilder MDB(CalledFunc->getContext());
-
- // Create a new scope domain for this function.
- MDNode *NewDomain =
- MDB.createAnonymousAliasScopeDomain(CalledFunc->getName());
- for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) {
- const Argument *A = NoAliasArgs[i];
-
- std::string Name = CalledFunc->getName();
- if (A->hasName()) {
- Name += ": %";
- Name += A->getName();
- } else {
- Name += ": argument ";
- Name += utostr(i);
- }
-
- // Note: We always create a new anonymous root here. This is true regardless
- // of the linkage of the callee because the aliasing "scope" is not just a
- // property of the callee, but also all control dependencies in the caller.
- MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
- NewScopes.insert(std::make_pair(A, NewScope));
- }
-
- // Iterate over all new instructions in the map; for all memory-access
- // instructions, add the alias scope metadata.
- for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
- VMI != VMIE; ++VMI) {
- if (const Instruction *I = dyn_cast<Instruction>(VMI->first)) {
- if (!VMI->second)
- continue;
-
- Instruction *NI = dyn_cast<Instruction>(VMI->second);
- if (!NI)
- continue;
-
- bool IsArgMemOnlyCall = false, IsFuncCall = false;
- SmallVector<const Value *, 2> PtrArgs;
-
- if (const LoadInst *LI = dyn_cast<LoadInst>(I))
- PtrArgs.push_back(LI->getPointerOperand());
- else if (const StoreInst *SI = dyn_cast<StoreInst>(I))
- PtrArgs.push_back(SI->getPointerOperand());
- else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
- PtrArgs.push_back(VAAI->getPointerOperand());
- else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I))
- PtrArgs.push_back(CXI->getPointerOperand());
- else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))
- PtrArgs.push_back(RMWI->getPointerOperand());
- else if (const auto *Call = dyn_cast<CallBase>(I)) {
- // If we know that the call does not access memory, then we'll still
- // know that about the inlined clone of this call site, and we don't
- // need to add metadata.
- if (Call->doesNotAccessMemory())
- continue;
-
- IsFuncCall = true;
- if (CalleeAAR) {
- FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(Call);
- if (MRB == FMRB_OnlyAccessesArgumentPointees ||
- MRB == FMRB_OnlyReadsArgumentPointees)
- IsArgMemOnlyCall = true;
- }
-
- for (Value *Arg : Call->args()) {
- // We need to check the underlying objects of all arguments, not just
- // the pointer arguments, because we might be passing pointers as
- // integers, etc.
- // However, if we know that the call only accesses pointer arguments,
- // then we only need to check the pointer arguments.
- if (IsArgMemOnlyCall && !Arg->getType()->isPointerTy())
- continue;
-
- PtrArgs.push_back(Arg);
- }
- }
-
- // If we found no pointers, then this instruction is not suitable for
- // pairing with an instruction to receive aliasing metadata.
- // However, if this is a call, this we might just alias with none of the
- // noalias arguments.
- if (PtrArgs.empty() && !IsFuncCall)
- continue;
-
- // It is possible that there is only one underlying object, but you
- // need to go through several PHIs to see it, and thus could be
- // repeated in the Objects list.
- SmallPtrSet<const Value *, 4> ObjSet;
- SmallVector<Metadata *, 4> Scopes, NoAliases;
-
- SmallSetVector<const Argument *, 4> NAPtrArgs;
- for (const Value *V : PtrArgs) {
- SmallVector<const Value *, 4> Objects;
- GetUnderlyingObjects(V, Objects, DL, /* LI = */ nullptr);
-
- for (const Value *O : Objects)
- ObjSet.insert(O);
- }
-
- // Figure out if we're derived from anything that is not a noalias
- // argument.
- bool CanDeriveViaCapture = false, UsesAliasingPtr = false;
- for (const Value *V : ObjSet) {
- // Is this value a constant that cannot be derived from any pointer
- // value (we need to exclude constant expressions, for example, that
- // are formed from arithmetic on global symbols).
- bool IsNonPtrConst = isa<ConstantInt>(V) || isa<ConstantFP>(V) ||
- isa<ConstantPointerNull>(V) ||
- isa<ConstantDataVector>(V) || isa<UndefValue>(V);
- if (IsNonPtrConst)
- continue;
-
- // If this is anything other than a noalias argument, then we cannot
- // completely describe the aliasing properties using alias.scope
- // metadata (and, thus, won't add any).
- if (const Argument *A = dyn_cast<Argument>(V)) {
- if (!A->hasNoAliasAttr())
- UsesAliasingPtr = true;
- } else {
- UsesAliasingPtr = true;
- }
-
- // If this is not some identified function-local object (which cannot
- // directly alias a noalias argument), or some other argument (which,
- // by definition, also cannot alias a noalias argument), then we could
- // alias a noalias argument that has been captured).
- if (!isa<Argument>(V) &&
- !isIdentifiedFunctionLocal(const_cast<Value*>(V)))
- CanDeriveViaCapture = true;
- }
-
- // A function call can always get captured noalias pointers (via other
- // parameters, globals, etc.).
- if (IsFuncCall && !IsArgMemOnlyCall)
- CanDeriveViaCapture = true;
-
- // First, we want to figure out all of the sets with which we definitely
- // don't alias. Iterate over all noalias set, and add those for which:
- // 1. The noalias argument is not in the set of objects from which we
- // definitely derive.
- // 2. The noalias argument has not yet been captured.
- // An arbitrary function that might load pointers could see captured
- // noalias arguments via other noalias arguments or globals, and so we
- // must always check for prior capture.
- for (const Argument *A : NoAliasArgs) {
- if (!ObjSet.count(A) && (!CanDeriveViaCapture ||
- // It might be tempting to skip the
- // PointerMayBeCapturedBefore check if
- // A->hasNoCaptureAttr() is true, but this is
- // incorrect because nocapture only guarantees
- // that no copies outlive the function, not
- // that the value cannot be locally captured.
- !PointerMayBeCapturedBefore(A,
- /* ReturnCaptures */ false,
- /* StoreCaptures */ false, I, &DT)))
- NoAliases.push_back(NewScopes[A]);
- }
-
- if (!NoAliases.empty())
- NI->setMetadata(LLVMContext::MD_noalias,
- MDNode::concatenate(
- NI->getMetadata(LLVMContext::MD_noalias),
- MDNode::get(CalledFunc->getContext(), NoAliases)));
-
- // Next, we want to figure out all of the sets to which we might belong.
- // We might belong to a set if the noalias argument is in the set of
- // underlying objects. If there is some non-noalias argument in our list
- // of underlying objects, then we cannot add a scope because the fact
- // that some access does not alias with any set of our noalias arguments
- // cannot itself guarantee that it does not alias with this access
- // (because there is some pointer of unknown origin involved and the
- // other access might also depend on this pointer). We also cannot add
- // scopes to arbitrary functions unless we know they don't access any
- // non-parameter pointer-values.
- bool CanAddScopes = !UsesAliasingPtr;
- if (CanAddScopes && IsFuncCall)
- CanAddScopes = IsArgMemOnlyCall;
-
- if (CanAddScopes)
- for (const Argument *A : NoAliasArgs) {
- if (ObjSet.count(A))
- Scopes.push_back(NewScopes[A]);
- }
-
- if (!Scopes.empty())
- NI->setMetadata(
- LLVMContext::MD_alias_scope,
- MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope),
- MDNode::get(CalledFunc->getContext(), Scopes)));
- }
- }
-}
-
-/// If the inlined function has non-byval align arguments, then
-/// add @llvm.assume-based alignment assumptions to preserve this information.
-static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
- if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache)
- return;
-
- AssumptionCache *AC = &(*IFI.GetAssumptionCache)(*CS.getCaller());
- auto &DL = CS.getCaller()->getParent()->getDataLayout();
-
- // To avoid inserting redundant assumptions, we should check for assumptions
- // already in the caller. To do this, we might need a DT of the caller.
- DominatorTree DT;
- bool DTCalculated = false;
-
- Function *CalledFunc = CS.getCalledFunction();
- for (Argument &Arg : CalledFunc->args()) {
- unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0;
- if (Align && !Arg.hasByValOrInAllocaAttr() && !Arg.hasNUses(0)) {
- if (!DTCalculated) {
- DT.recalculate(*CS.getCaller());
- DTCalculated = true;
- }
-
- // If we can already prove the asserted alignment in the context of the
- // caller, then don't bother inserting the assumption.
- Value *ArgVal = CS.getArgument(Arg.getArgNo());
- if (getKnownAlignment(ArgVal, DL, CS.getInstruction(), AC, &DT) >= Align)
- continue;
-
- CallInst *NewAsmp = IRBuilder<>(CS.getInstruction())
- .CreateAlignmentAssumption(DL, ArgVal, Align);
- AC->registerAssumption(NewAsmp);
- }
- }
-}
-
-/// Once we have cloned code over from a callee into the caller,
-/// update the specified callgraph to reflect the changes we made.
-/// Note that it's possible that not all code was copied over, so only
-/// some edges of the callgraph may remain.
-static void UpdateCallGraphAfterInlining(CallSite CS,
- Function::iterator FirstNewBlock,
- ValueToValueMapTy &VMap,
- InlineFunctionInfo &IFI) {
- CallGraph &CG = *IFI.CG;
- const Function *Caller = CS.getCaller();
- const Function *Callee = CS.getCalledFunction();
- CallGraphNode *CalleeNode = CG[Callee];
- CallGraphNode *CallerNode = CG[Caller];
-
- // Since we inlined some uninlined call sites in the callee into the caller,
- // add edges from the caller to all of the callees of the callee.
- CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end();
-
- // Consider the case where CalleeNode == CallerNode.
- CallGraphNode::CalledFunctionsVector CallCache;
- if (CalleeNode == CallerNode) {
- CallCache.assign(I, E);
- I = CallCache.begin();
- E = CallCache.end();
- }
-
- for (; I != E; ++I) {
- const Value *OrigCall = I->first;
-
- ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
- // Only copy the edge if the call was inlined!
- if (VMI == VMap.end() || VMI->second == nullptr)
- continue;
-
- // If the call was inlined, but then constant folded, there is no edge to
- // add. Check for this case.
- auto *NewCall = dyn_cast<CallBase>(VMI->second);
- if (!NewCall)
- continue;
-
- // We do not treat intrinsic calls like real function calls because we
- // expect them to become inline code; do not add an edge for an intrinsic.
- if (NewCall->getCalledFunction() &&
- NewCall->getCalledFunction()->isIntrinsic())
- continue;
-
- // Remember that this call site got inlined for the client of
- // InlineFunction.
- IFI.InlinedCalls.push_back(NewCall);
-
- // It's possible that inlining the callsite will cause it to go from an
- // indirect to a direct call by resolving a function pointer. If this
- // happens, set the callee of the new call site to a more precise
- // destination. This can also happen if the call graph node of the caller
- // was just unnecessarily imprecise.
- if (!I->second->getFunction())
- if (Function *F = NewCall->getCalledFunction()) {
- // Indirect call site resolved to direct call.
- CallerNode->addCalledFunction(NewCall, CG[F]);
-
- continue;
- }
-
- CallerNode->addCalledFunction(NewCall, I->second);
- }
-
- // Update the call graph by deleting the edge from Callee to Caller. We must
- // do this after the loop above in case Caller and Callee are the same.
- CallerNode->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction()));
-}
-
-static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
- BasicBlock *InsertBlock,
- InlineFunctionInfo &IFI) {
- Type *AggTy = cast<PointerType>(Src->getType())->getElementType();
- IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
-
- Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy));
-
- // Always generate a memcpy of alignment 1 here because we don't know
- // the alignment of the src pointer. Other optimizations can infer
- // better alignment.
- Builder.CreateMemCpy(Dst, /*DstAlign*/1, Src, /*SrcAlign*/1, Size);
-}
-
-/// When inlining a call site that has a byval argument,
-/// we have to make the implicit memcpy explicit by adding it.
-static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
- const Function *CalledFunc,
- InlineFunctionInfo &IFI,
- unsigned ByValAlignment) {
- PointerType *ArgTy = cast<PointerType>(Arg->getType());
- Type *AggTy = ArgTy->getElementType();
-
- Function *Caller = TheCall->getFunction();
- const DataLayout &DL = Caller->getParent()->getDataLayout();
-
- // If the called function is readonly, then it could not mutate the caller's
- // copy of the byval'd memory. In this case, it is safe to elide the copy and
- // temporary.
- if (CalledFunc->onlyReadsMemory()) {
- // If the byval argument has a specified alignment that is greater than the
- // passed in pointer, then we either have to round up the input pointer or
- // give up on this transformation.
- if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment.
- return Arg;
-
- AssumptionCache *AC =
- IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr;
-
- // If the pointer is already known to be sufficiently aligned, or if we can
- // round it up to a larger alignment, then we don't need a temporary.
- if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, AC) >=
- ByValAlignment)
- return Arg;
-
- // Otherwise, we have to make a memcpy to get a safe alignment. This is bad
- // for code quality, but rarely happens and is required for correctness.
- }
-
- // Create the alloca. If we have DataLayout, use nice alignment.
- unsigned Align = DL.getPrefTypeAlignment(AggTy);
-
- // If the byval had an alignment specified, we *must* use at least that
- // alignment, as it is required by the byval argument (and uses of the
- // pointer inside the callee).
- Align = std::max(Align, ByValAlignment);
-
- Value *NewAlloca = new AllocaInst(AggTy, DL.getAllocaAddrSpace(),
- nullptr, Align, Arg->getName(),
- &*Caller->begin()->begin());
- IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
-
- // Uses of the argument in the function should use our new alloca
- // instead.
- return NewAlloca;
-}
-
-// Check whether this Value is used by a lifetime intrinsic.
-static bool isUsedByLifetimeMarker(Value *V) {
- for (User *U : V->users())
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U))
- if (II->isLifetimeStartOrEnd())
- return true;
- return false;
-}
-
-// Check whether the given alloca already has
-// lifetime.start or lifetime.end intrinsics.
-static bool hasLifetimeMarkers(AllocaInst *AI) {
- Type *Ty = AI->getType();
- Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(),
- Ty->getPointerAddressSpace());
- if (Ty == Int8PtrTy)
- return isUsedByLifetimeMarker(AI);
-
- // Do a scan to find all the casts to i8*.
- for (User *U : AI->users()) {
- if (U->getType() != Int8PtrTy) continue;
- if (U->stripPointerCasts() != AI) continue;
- if (isUsedByLifetimeMarker(U))
- return true;
- }
- return false;
-}
-
-/// Return the result of AI->isStaticAlloca() if AI were moved to the entry
-/// block. Allocas used in inalloca calls and allocas of dynamic array size
-/// cannot be static.
-static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
- return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca();
-}
-
-/// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL
-/// inlined at \p InlinedAt. \p IANodes is an inlined-at cache.
-static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt,
- LLVMContext &Ctx,
- DenseMap<const MDNode *, MDNode *> &IANodes) {
- auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes);
- return DebugLoc::get(OrigDL.getLine(), OrigDL.getCol(), OrigDL.getScope(),
- IA);
-}
-
-/// Returns the LoopID for a loop which has has been cloned from another
-/// function for inlining with the new inlined-at start and end locs.
-static MDNode *inlineLoopID(const MDNode *OrigLoopId, DILocation *InlinedAt,
- LLVMContext &Ctx,
- DenseMap<const MDNode *, MDNode *> &IANodes) {
- assert(OrigLoopId && OrigLoopId->getNumOperands() > 0 &&
- "Loop ID needs at least one operand");
- assert(OrigLoopId && OrigLoopId->getOperand(0).get() == OrigLoopId &&
- "Loop ID should refer to itself");
-
- // Save space for the self-referential LoopID.
- SmallVector<Metadata *, 4> MDs = {nullptr};
-
- for (unsigned i = 1; i < OrigLoopId->getNumOperands(); ++i) {
- Metadata *MD = OrigLoopId->getOperand(i);
- // Update the DILocations to encode the inlined-at metadata.
- if (DILocation *DL = dyn_cast<DILocation>(MD))
- MDs.push_back(inlineDebugLoc(DL, InlinedAt, Ctx, IANodes));
- else
- MDs.push_back(MD);
- }
-
- MDNode *NewLoopID = MDNode::getDistinct(Ctx, MDs);
- // Insert the self-referential LoopID.
- NewLoopID->replaceOperandWith(0, NewLoopID);
- return NewLoopID;
-}
-
-/// Update inlined instructions' line numbers to
-/// to encode location where these instructions are inlined.
-static void fixupLineNumbers(Function *Fn, Function::iterator FI,
- Instruction *TheCall, bool CalleeHasDebugInfo) {
- const DebugLoc &TheCallDL = TheCall->getDebugLoc();
- if (!TheCallDL)
- return;
-
- auto &Ctx = Fn->getContext();
- DILocation *InlinedAtNode = TheCallDL;
-
- // Create a unique call site, not to be confused with any other call from the
- // same location.
- InlinedAtNode = DILocation::getDistinct(
- Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(),
- InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt());
-
- // Cache the inlined-at nodes as they're built so they are reused, without
- // this every instruction's inlined-at chain would become distinct from each
- // other.
- DenseMap<const MDNode *, MDNode *> IANodes;
-
- for (; FI != Fn->end(); ++FI) {
- for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
- BI != BE; ++BI) {
- // Loop metadata needs to be updated so that the start and end locs
- // reference inlined-at locations.
- if (MDNode *LoopID = BI->getMetadata(LLVMContext::MD_loop)) {
- MDNode *NewLoopID =
- inlineLoopID(LoopID, InlinedAtNode, BI->getContext(), IANodes);
- BI->setMetadata(LLVMContext::MD_loop, NewLoopID);
- }
-
- if (DebugLoc DL = BI->getDebugLoc()) {
- DebugLoc IDL =
- inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes);
- BI->setDebugLoc(IDL);
- continue;
- }
-
- if (CalleeHasDebugInfo)
- continue;
-
- // If the inlined instruction has no line number, make it look as if it
- // originates from the call location. This is important for
- // ((__always_inline__, __nodebug__)) functions which must use caller
- // location for all instructions in their function body.
-
- // Don't update static allocas, as they may get moved later.
- if (auto *AI = dyn_cast<AllocaInst>(BI))
- if (allocaWouldBeStaticInEntry(AI))
- continue;
-
- BI->setDebugLoc(TheCallDL);
- }
- }
-}
-
-/// Update the block frequencies of the caller after a callee has been inlined.
-///
-/// Each block cloned into the caller has its block frequency scaled by the
-/// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of
-/// callee's entry block gets the same frequency as the callsite block and the
-/// relative frequencies of all cloned blocks remain the same after cloning.
-static void updateCallerBFI(BasicBlock *CallSiteBlock,
- const ValueToValueMapTy &VMap,
- BlockFrequencyInfo *CallerBFI,
- BlockFrequencyInfo *CalleeBFI,
- const BasicBlock &CalleeEntryBlock) {
- SmallPtrSet<BasicBlock *, 16> ClonedBBs;
- for (auto const &Entry : VMap) {
- if (!isa<BasicBlock>(Entry.first) || !Entry.second)
- continue;
- auto *OrigBB = cast<BasicBlock>(Entry.first);
- auto *ClonedBB = cast<BasicBlock>(Entry.second);
- uint64_t Freq = CalleeBFI->getBlockFreq(OrigBB).getFrequency();
- if (!ClonedBBs.insert(ClonedBB).second) {
- // Multiple blocks in the callee might get mapped to one cloned block in
- // the caller since we prune the callee as we clone it. When that happens,
- // we want to use the maximum among the original blocks' frequencies.
- uint64_t NewFreq = CallerBFI->getBlockFreq(ClonedBB).getFrequency();
- if (NewFreq > Freq)
- Freq = NewFreq;
- }
- CallerBFI->setBlockFreq(ClonedBB, Freq);
- }
- BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock));
- CallerBFI->setBlockFreqAndScale(
- EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(),
- ClonedBBs);
-}
-
-/// Update the branch metadata for cloned call instructions.
-static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
- const ProfileCount &CalleeEntryCount,
- const Instruction *TheCall,
- ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *CallerBFI) {
- if (!CalleeEntryCount.hasValue() || CalleeEntryCount.isSynthetic() ||
- CalleeEntryCount.getCount() < 1)
- return;
- auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None;
- int64_t CallCount =
- std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0,
- CalleeEntryCount.getCount());
- updateProfileCallee(Callee, -CallCount, &VMap);
-}
-
-void llvm::updateProfileCallee(
- Function *Callee, int64_t entryDelta,
- const ValueMap<const Value *, WeakTrackingVH> *VMap) {
- auto CalleeCount = Callee->getEntryCount();
- if (!CalleeCount.hasValue())
- return;
-
- uint64_t priorEntryCount = CalleeCount.getCount();
- uint64_t newEntryCount;
-
- // Since CallSiteCount is an estimate, it could exceed the original callee
- // count and has to be set to 0 so guard against underflow.
- if (entryDelta < 0 && static_cast<uint64_t>(-entryDelta) > priorEntryCount)
- newEntryCount = 0;
- else
- newEntryCount = priorEntryCount + entryDelta;
-
- Callee->setEntryCount(newEntryCount);
-
- // During inlining ?
- if (VMap) {
- uint64_t cloneEntryCount = priorEntryCount - newEntryCount;
- for (auto const &Entry : *VMap)
- if (isa<CallInst>(Entry.first))
- if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
- CI->updateProfWeight(cloneEntryCount, priorEntryCount);
- }
- for (BasicBlock &BB : *Callee)
- // No need to update the callsite if it is pruned during inlining.
- if (!VMap || VMap->count(&BB))
- for (Instruction &I : BB)
- if (CallInst *CI = dyn_cast<CallInst>(&I))
- CI->updateProfWeight(newEntryCount, priorEntryCount);
-}
-
-/// This function inlines the called function into the basic block of the
-/// caller. This returns false if it is not possible to inline this call.
-/// The program is still in a well defined state if this occurs though.
-///
-/// Note that this only does one level of inlining. For example, if the
-/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
-/// exists in the instruction stream. Similarly this will inline a recursive
-/// function by one level.
-llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
- AAResults *CalleeAAR,
- bool InsertLifetime,
- Function *ForwardVarArgsTo) {
- Instruction *TheCall = CS.getInstruction();
- assert(TheCall->getParent() && TheCall->getFunction()
- && "Instruction not in function!");
-
- // FIXME: we don't inline callbr yet.
- if (isa<CallBrInst>(TheCall))
- return false;
-
- // If IFI has any state in it, zap it before we fill it in.
- IFI.reset();
-
- Function *CalledFunc = CS.getCalledFunction();
- if (!CalledFunc || // Can't inline external function or indirect
- CalledFunc->isDeclaration()) // call!
- return "external or indirect";
-
- // The inliner does not know how to inline through calls with operand bundles
- // in general ...
- if (CS.hasOperandBundles()) {
- for (int i = 0, e = CS.getNumOperandBundles(); i != e; ++i) {
- uint32_t Tag = CS.getOperandBundleAt(i).getTagID();
- // ... but it knows how to inline through "deopt" operand bundles ...
- if (Tag == LLVMContext::OB_deopt)
- continue;
- // ... and "funclet" operand bundles.
- if (Tag == LLVMContext::OB_funclet)
- continue;
-
- return "unsupported operand bundle";
- }
- }
-
- // If the call to the callee cannot throw, set the 'nounwind' flag on any
- // calls that we inline.
- bool MarkNoUnwind = CS.doesNotThrow();
-
- BasicBlock *OrigBB = TheCall->getParent();
- Function *Caller = OrigBB->getParent();
-
- // GC poses two hazards to inlining, which only occur when the callee has GC:
- // 1. If the caller has no GC, then the callee's GC must be propagated to the
- // caller.
- // 2. If the caller has a differing GC, it is invalid to inline.
- if (CalledFunc->hasGC()) {
- if (!Caller->hasGC())
- Caller->setGC(CalledFunc->getGC());
- else if (CalledFunc->getGC() != Caller->getGC())
- return "incompatible GC";
- }
-
- // Get the personality function from the callee if it contains a landing pad.
- Constant *CalledPersonality =
- CalledFunc->hasPersonalityFn()
- ? CalledFunc->getPersonalityFn()->stripPointerCasts()
- : nullptr;
-
- // Find the personality function used by the landing pads of the caller. If it
- // exists, then check to see that it matches the personality function used in
- // the callee.
- Constant *CallerPersonality =
- Caller->hasPersonalityFn()
- ? Caller->getPersonalityFn()->stripPointerCasts()
- : nullptr;
- if (CalledPersonality) {
- if (!CallerPersonality)
- Caller->setPersonalityFn(CalledPersonality);
- // If the personality functions match, then we can perform the
- // inlining. Otherwise, we can't inline.
- // TODO: This isn't 100% true. Some personality functions are proper
- // supersets of others and can be used in place of the other.
- else if (CalledPersonality != CallerPersonality)
- return "incompatible personality";
- }
-
- // We need to figure out which funclet the callsite was in so that we may
- // properly nest the callee.
- Instruction *CallSiteEHPad = nullptr;
- if (CallerPersonality) {
- EHPersonality Personality = classifyEHPersonality(CallerPersonality);
- if (isScopedEHPersonality(Personality)) {
- Optional<OperandBundleUse> ParentFunclet =
- CS.getOperandBundle(LLVMContext::OB_funclet);
- if (ParentFunclet)
- CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front());
-
- // OK, the inlining site is legal. What about the target function?
-
- if (CallSiteEHPad) {
- if (Personality == EHPersonality::MSVC_CXX) {
- // The MSVC personality cannot tolerate catches getting inlined into
- // cleanup funclets.
- if (isa<CleanupPadInst>(CallSiteEHPad)) {
- // Ok, the call site is within a cleanuppad. Let's check the callee
- // for catchpads.
- for (const BasicBlock &CalledBB : *CalledFunc) {
- if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI()))
- return "catch in cleanup funclet";
- }
- }
- } else if (isAsynchronousEHPersonality(Personality)) {
- // SEH is even less tolerant, there may not be any sort of exceptional
- // funclet in the callee.
- for (const BasicBlock &CalledBB : *CalledFunc) {
- if (CalledBB.isEHPad())
- return "SEH in cleanup funclet";
- }
- }
- }
- }
- }
-
- // Determine if we are dealing with a call in an EHPad which does not unwind
- // to caller.
- bool EHPadForCallUnwindsLocally = false;
- if (CallSiteEHPad && CS.isCall()) {
- UnwindDestMemoTy FuncletUnwindMap;
- Value *CallSiteUnwindDestToken =
- getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap);
-
- EHPadForCallUnwindsLocally =
- CallSiteUnwindDestToken &&
- !isa<ConstantTokenNone>(CallSiteUnwindDestToken);
- }
-
- // Get an iterator to the last basic block in the function, which will have
- // the new function inlined after it.
- Function::iterator LastBlock = --Caller->end();
-
- // Make sure to capture all of the return instructions from the cloned
- // function.
- SmallVector<ReturnInst*, 8> Returns;
- ClonedCodeInfo InlinedFunctionInfo;
- Function::iterator FirstNewBlock;
-
- { // Scope to destroy VMap after cloning.
- ValueToValueMapTy VMap;
- // Keep a list of pair (dst, src) to emit byval initializations.
- SmallVector<std::pair<Value*, Value*>, 4> ByValInit;
-
- auto &DL = Caller->getParent()->getDataLayout();
-
- // Calculate the vector of arguments to pass into the function cloner, which
- // matches up the formal to the actual argument values.
- CallSite::arg_iterator AI = CS.arg_begin();
- unsigned ArgNo = 0;
- for (Function::arg_iterator I = CalledFunc->arg_begin(),
- E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
- Value *ActualArg = *AI;
-
- // When byval arguments actually inlined, we need to make the copy implied
- // by them explicit. However, we don't do this if the callee is readonly
- // or readnone, because the copy would be unneeded: the callee doesn't
- // modify the struct.
- if (CS.isByValArgument(ArgNo)) {
- ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
- CalledFunc->getParamAlignment(ArgNo));
- if (ActualArg != *AI)
- ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
- }
-
- VMap[&*I] = ActualArg;
- }
-
- // Add alignment assumptions if necessary. We do this before the inlined
- // instructions are actually cloned into the caller so that we can easily
- // check what will be known at the start of the inlined code.
- AddAlignmentAssumptions(CS, IFI);
-
- // We want the inliner to prune the code as it copies. We would LOVE to
- // have no dead or constant instructions leftover after inlining occurs
- // (which can happen, e.g., because an argument was constant), but we'll be
- // happy with whatever the cloner can do.
- CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
- /*ModuleLevelChanges=*/false, Returns, ".i",
- &InlinedFunctionInfo, TheCall);
- // Remember the first block that is newly cloned over.
- FirstNewBlock = LastBlock; ++FirstNewBlock;
-
- if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)
- // Update the BFI of blocks cloned into the caller.
- updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,
- CalledFunc->front());
-
- updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall,
- IFI.PSI, IFI.CallerBFI);
-
- // Inject byval arguments initialization.
- for (std::pair<Value*, Value*> &Init : ByValInit)
- HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
- &*FirstNewBlock, IFI);
-
- Optional<OperandBundleUse> ParentDeopt =
- CS.getOperandBundle(LLVMContext::OB_deopt);
- if (ParentDeopt) {
- SmallVector<OperandBundleDef, 2> OpDefs;
-
- for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
- Instruction *I = dyn_cast_or_null<Instruction>(VH);
- if (!I) continue; // instruction was DCE'd or RAUW'ed to undef
-
- OpDefs.clear();
-
- CallSite ICS(I);
- OpDefs.reserve(ICS.getNumOperandBundles());
-
- for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) {
- auto ChildOB = ICS.getOperandBundleAt(i);
- if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
- // If the inlined call has other operand bundles, let them be
- OpDefs.emplace_back(ChildOB);
- continue;
- }
-
- // It may be useful to separate this logic (of handling operand
- // bundles) out to a separate "policy" component if this gets crowded.
- // Prepend the parent's deoptimization continuation to the newly
- // inlined call's deoptimization continuation.
- std::vector<Value *> MergedDeoptArgs;
- MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() +
- ChildOB.Inputs.size());
-
- MergedDeoptArgs.insert(MergedDeoptArgs.end(),
- ParentDeopt->Inputs.begin(),
- ParentDeopt->Inputs.end());
- MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(),
- ChildOB.Inputs.end());
-
- OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs));
- }
-
- Instruction *NewI = nullptr;
- if (isa<CallInst>(I))
- NewI = CallInst::Create(cast<CallInst>(I), OpDefs, I);
- else if (isa<CallBrInst>(I))
- NewI = CallBrInst::Create(cast<CallBrInst>(I), OpDefs, I);
- else
- NewI = InvokeInst::Create(cast<InvokeInst>(I), OpDefs, I);
-
- // Note: the RAUW does the appropriate fixup in VMap, so we need to do
- // this even if the call returns void.
- I->replaceAllUsesWith(NewI);
-
- VH = nullptr;
- I->eraseFromParent();
- }
- }
-
- // Update the callgraph if requested.
- if (IFI.CG)
- UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
-
- // For 'nodebug' functions, the associated DISubprogram is always null.
- // Conservatively avoid propagating the callsite debug location to
- // instructions inlined from a function whose DISubprogram is not null.
- fixupLineNumbers(Caller, FirstNewBlock, TheCall,
- CalledFunc->getSubprogram() != nullptr);
-
- // Clone existing noalias metadata if necessary.
- CloneAliasScopeMetadata(CS, VMap);
-
- // Add noalias metadata if necessary.
- AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR);
-
- // Propagate llvm.mem.parallel_loop_access if necessary.
- PropagateParallelLoopAccessMetadata(CS, VMap);
-
- // Register any cloned assumptions.
- if (IFI.GetAssumptionCache)
- for (BasicBlock &NewBlock :
- make_range(FirstNewBlock->getIterator(), Caller->end()))
- for (Instruction &I : NewBlock) {
- if (auto *II = dyn_cast<IntrinsicInst>(&I))
- if (II->getIntrinsicID() == Intrinsic::assume)
- (*IFI.GetAssumptionCache)(*Caller).registerAssumption(II);
- }
- }
-
- // If there are any alloca instructions in the block that used to be the entry
- // block for the callee, move them to the entry block of the caller. First
- // calculate which instruction they should be inserted before. We insert the
- // instructions at the end of the current alloca list.
- {
- BasicBlock::iterator InsertPoint = Caller->begin()->begin();
- for (BasicBlock::iterator I = FirstNewBlock->begin(),
- E = FirstNewBlock->end(); I != E; ) {
- AllocaInst *AI = dyn_cast<AllocaInst>(I++);
- if (!AI) continue;
-
- // If the alloca is now dead, remove it. This often occurs due to code
- // specialization.
- if (AI->use_empty()) {
- AI->eraseFromParent();
- continue;
- }
-
- if (!allocaWouldBeStaticInEntry(AI))
- continue;
-
- // Keep track of the static allocas that we inline into the caller.
- IFI.StaticAllocas.push_back(AI);
-
- // Scan for the block of allocas that we can move over, and move them
- // all at once.
- while (isa<AllocaInst>(I) &&
- allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) {
- IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
- ++I;
- }
-
- // Transfer all of the allocas over in a block. Using splice means
- // that the instructions aren't removed from the symbol table, then
- // reinserted.
- Caller->getEntryBlock().getInstList().splice(
- InsertPoint, FirstNewBlock->getInstList(), AI->getIterator(), I);
- }
- // Move any dbg.declares describing the allocas into the entry basic block.
- DIBuilder DIB(*Caller->getParent());
- for (auto &AI : IFI.StaticAllocas)
- replaceDbgDeclareForAlloca(AI, AI, DIB, DIExpression::ApplyOffset, 0);
- }
-
- SmallVector<Value*,4> VarArgsToForward;
- SmallVector<AttributeSet, 4> VarArgsAttrs;
- for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
- i < CS.getNumArgOperands(); i++) {
- VarArgsToForward.push_back(CS.getArgOperand(i));
- VarArgsAttrs.push_back(CS.getAttributes().getParamAttributes(i));
- }
-
- bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
- if (InlinedFunctionInfo.ContainsCalls) {
- CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
- if (CallInst *CI = dyn_cast<CallInst>(TheCall))
- CallSiteTailKind = CI->getTailCallKind();
-
- // For inlining purposes, the "notail" marker is the same as no marker.
- if (CallSiteTailKind == CallInst::TCK_NoTail)
- CallSiteTailKind = CallInst::TCK_None;
-
- for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
- ++BB) {
- for (auto II = BB->begin(); II != BB->end();) {
- Instruction &I = *II++;
- CallInst *CI = dyn_cast<CallInst>(&I);
- if (!CI)
- continue;
-
- // Forward varargs from inlined call site to calls to the
- // ForwardVarArgsTo function, if requested, and to musttail calls.
- if (!VarArgsToForward.empty() &&
- ((ForwardVarArgsTo &&
- CI->getCalledFunction() == ForwardVarArgsTo) ||
- CI->isMustTailCall())) {
- // Collect attributes for non-vararg parameters.
- AttributeList Attrs = CI->getAttributes();
- SmallVector<AttributeSet, 8> ArgAttrs;
- if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) {
- for (unsigned ArgNo = 0;
- ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo)
- ArgAttrs.push_back(Attrs.getParamAttributes(ArgNo));
- }
-
- // Add VarArg attributes.
- ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end());
- Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttributes(),
- Attrs.getRetAttributes(), ArgAttrs);
- // Add VarArgs to existing parameters.
- SmallVector<Value *, 6> Params(CI->arg_operands());
- Params.append(VarArgsToForward.begin(), VarArgsToForward.end());
- CallInst *NewCI = CallInst::Create(
- CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI);
- NewCI->setDebugLoc(CI->getDebugLoc());
- NewCI->setAttributes(Attrs);
- NewCI->setCallingConv(CI->getCallingConv());
- CI->replaceAllUsesWith(NewCI);
- CI->eraseFromParent();
- CI = NewCI;
- }
-
- if (Function *F = CI->getCalledFunction())
- InlinedDeoptimizeCalls |=
- F->getIntrinsicID() == Intrinsic::experimental_deoptimize;
-
- // We need to reduce the strength of any inlined tail calls. For
- // musttail, we have to avoid introducing potential unbounded stack
- // growth. For example, if functions 'f' and 'g' are mutually recursive
- // with musttail, we can inline 'g' into 'f' so long as we preserve
- // musttail on the cloned call to 'f'. If either the inlined call site
- // or the cloned call site is *not* musttail, the program already has
- // one frame of stack growth, so it's safe to remove musttail. Here is
- // a table of example transformations:
- //
- // f -> musttail g -> musttail f ==> f -> musttail f
- // f -> musttail g -> tail f ==> f -> tail f
- // f -> g -> musttail f ==> f -> f
- // f -> g -> tail f ==> f -> f
- //
- // Inlined notail calls should remain notail calls.
- CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
- if (ChildTCK != CallInst::TCK_NoTail)
- ChildTCK = std::min(CallSiteTailKind, ChildTCK);
- CI->setTailCallKind(ChildTCK);
- InlinedMustTailCalls |= CI->isMustTailCall();
-
- // Calls inlined through a 'nounwind' call site should be marked
- // 'nounwind'.
- if (MarkNoUnwind)
- CI->setDoesNotThrow();
- }
- }
- }
-
- // Leave lifetime markers for the static alloca's, scoping them to the
- // function we just inlined.
- if (InsertLifetime && !IFI.StaticAllocas.empty()) {
- IRBuilder<> builder(&FirstNewBlock->front());
- for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
- AllocaInst *AI = IFI.StaticAllocas[ai];
- // Don't mark swifterror allocas. They can't have bitcast uses.
- if (AI->isSwiftError())
- continue;
-
- // If the alloca is already scoped to something smaller than the whole
- // function then there's no need to add redundant, less accurate markers.
- if (hasLifetimeMarkers(AI))
- continue;
-
- // Try to determine the size of the allocation.
- ConstantInt *AllocaSize = nullptr;
- if (ConstantInt *AIArraySize =
- dyn_cast<ConstantInt>(AI->getArraySize())) {
- auto &DL = Caller->getParent()->getDataLayout();
- Type *AllocaType = AI->getAllocatedType();
- uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType);
- uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
-
- // Don't add markers for zero-sized allocas.
- if (AllocaArraySize == 0)
- continue;
-
- // Check that array size doesn't saturate uint64_t and doesn't
- // overflow when it's multiplied by type size.
- if (AllocaArraySize != std::numeric_limits<uint64_t>::max() &&
- std::numeric_limits<uint64_t>::max() / AllocaArraySize >=
- AllocaTypeSize) {
- AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
- AllocaArraySize * AllocaTypeSize);
- }
- }
-
- builder.CreateLifetimeStart(AI, AllocaSize);
- for (ReturnInst *RI : Returns) {
- // Don't insert llvm.lifetime.end calls between a musttail or deoptimize
- // call and a return. The return kills all local allocas.
- if (InlinedMustTailCalls &&
- RI->getParent()->getTerminatingMustTailCall())
- continue;
- if (InlinedDeoptimizeCalls &&
- RI->getParent()->getTerminatingDeoptimizeCall())
- continue;
- IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize);
- }
- }
- }
-
- // If the inlined code contained dynamic alloca instructions, wrap the inlined
- // code with llvm.stacksave/llvm.stackrestore intrinsics.
- if (InlinedFunctionInfo.ContainsDynamicAllocas) {
- Module *M = Caller->getParent();
- // Get the two intrinsics we care about.
- Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
- Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
-
- // Insert the llvm.stacksave.
- CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin())
- .CreateCall(StackSave, {}, "savedstack");
-
- // Insert a call to llvm.stackrestore before any return instructions in the
- // inlined function.
- for (ReturnInst *RI : Returns) {
- // Don't insert llvm.stackrestore calls between a musttail or deoptimize
- // call and a return. The return will restore the stack pointer.
- if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall())
- continue;
- if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall())
- continue;
- IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr);
- }
- }
-
- // If we are inlining for an invoke instruction, we must make sure to rewrite
- // any call instructions into invoke instructions. This is sensitive to which
- // funclet pads were top-level in the inlinee, so must be done before
- // rewriting the "parent pad" links.
- if (auto *II = dyn_cast<InvokeInst>(TheCall)) {
- BasicBlock *UnwindDest = II->getUnwindDest();
- Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI();
- if (isa<LandingPadInst>(FirstNonPHI)) {
- HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo);
- } else {
- HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo);
- }
- }
-
- // Update the lexical scopes of the new funclets and callsites.
- // Anything that had 'none' as its parent is now nested inside the callsite's
- // EHPad.
-
- if (CallSiteEHPad) {
- for (Function::iterator BB = FirstNewBlock->getIterator(),
- E = Caller->end();
- BB != E; ++BB) {
- // Add bundle operands to any top-level call sites.
- SmallVector<OperandBundleDef, 1> OpBundles;
- for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) {
- Instruction *I = &*BBI++;
- CallSite CS(I);
- if (!CS)
- continue;
-
- // Skip call sites which are nounwind intrinsics.
- auto *CalledFn =
- dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
- if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow())
- continue;
-
- // Skip call sites which already have a "funclet" bundle.
- if (CS.getOperandBundle(LLVMContext::OB_funclet))
- continue;
-
- CS.getOperandBundlesAsDefs(OpBundles);
- OpBundles.emplace_back("funclet", CallSiteEHPad);
-
- Instruction *NewInst;
- if (CS.isCall())
- NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I);
- else if (CS.isCallBr())
- NewInst = CallBrInst::Create(cast<CallBrInst>(I), OpBundles, I);
- else
- NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I);
- NewInst->takeName(I);
- I->replaceAllUsesWith(NewInst);
- I->eraseFromParent();
-
- OpBundles.clear();
- }
-
- // It is problematic if the inlinee has a cleanupret which unwinds to
- // caller and we inline it into a call site which doesn't unwind but into
- // an EH pad that does. Such an edge must be dynamically unreachable.
- // As such, we replace the cleanupret with unreachable.
- if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator()))
- if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally)
- changeToUnreachable(CleanupRet, /*UseLLVMTrap=*/false);
-
- Instruction *I = BB->getFirstNonPHI();
- if (!I->isEHPad())
- continue;
-
- if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
- if (isa<ConstantTokenNone>(CatchSwitch->getParentPad()))
- CatchSwitch->setParentPad(CallSiteEHPad);
- } else {
- auto *FPI = cast<FuncletPadInst>(I);
- if (isa<ConstantTokenNone>(FPI->getParentPad()))
- FPI->setParentPad(CallSiteEHPad);
- }
- }
- }
-
- if (InlinedDeoptimizeCalls) {
- // We need to at least remove the deoptimizing returns from the Return set,
- // so that the control flow from those returns does not get merged into the
- // caller (but terminate it instead). If the caller's return type does not
- // match the callee's return type, we also need to change the return type of
- // the intrinsic.
- if (Caller->getReturnType() == TheCall->getType()) {
- auto NewEnd = llvm::remove_if(Returns, [](ReturnInst *RI) {
- return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
- });
- Returns.erase(NewEnd, Returns.end());
- } else {
- SmallVector<ReturnInst *, 8> NormalReturns;
- Function *NewDeoptIntrinsic = Intrinsic::getDeclaration(
- Caller->getParent(), Intrinsic::experimental_deoptimize,
- {Caller->getReturnType()});
-
- for (ReturnInst *RI : Returns) {
- CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall();
- if (!DeoptCall) {
- NormalReturns.push_back(RI);
- continue;
- }
-
- // The calling convention on the deoptimize call itself may be bogus,
- // since the code we're inlining may have undefined behavior (and may
- // never actually execute at runtime); but all
- // @llvm.experimental.deoptimize declarations have to have the same
- // calling convention in a well-formed module.
- auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv();
- NewDeoptIntrinsic->setCallingConv(CallingConv);
- auto *CurBB = RI->getParent();
- RI->eraseFromParent();
-
- SmallVector<Value *, 4> CallArgs(DeoptCall->arg_begin(),
- DeoptCall->arg_end());
-
- SmallVector<OperandBundleDef, 1> OpBundles;
- DeoptCall->getOperandBundlesAsDefs(OpBundles);
- DeoptCall->eraseFromParent();
- assert(!OpBundles.empty() &&
- "Expected at least the deopt operand bundle");
-
- IRBuilder<> Builder(CurBB);
- CallInst *NewDeoptCall =
- Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles);
- NewDeoptCall->setCallingConv(CallingConv);
- if (NewDeoptCall->getType()->isVoidTy())
- Builder.CreateRetVoid();
- else
- Builder.CreateRet(NewDeoptCall);
- }
-
- // Leave behind the normal returns so we can merge control flow.
- std::swap(Returns, NormalReturns);
- }
- }
-
- // Handle any inlined musttail call sites. In order for a new call site to be
- // musttail, the source of the clone and the inlined call site must have been
- // musttail. Therefore it's safe to return without merging control into the
- // phi below.
- if (InlinedMustTailCalls) {
- // Check if we need to bitcast the result of any musttail calls.
- Type *NewRetTy = Caller->getReturnType();
- bool NeedBitCast = !TheCall->use_empty() && TheCall->getType() != NewRetTy;
-
- // Handle the returns preceded by musttail calls separately.
- SmallVector<ReturnInst *, 8> NormalReturns;
- for (ReturnInst *RI : Returns) {
- CallInst *ReturnedMustTail =
- RI->getParent()->getTerminatingMustTailCall();
- if (!ReturnedMustTail) {
- NormalReturns.push_back(RI);
- continue;
- }
- if (!NeedBitCast)
- continue;
-
- // Delete the old return and any preceding bitcast.
- BasicBlock *CurBB = RI->getParent();
- auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue());
- RI->eraseFromParent();
- if (OldCast)
- OldCast->eraseFromParent();
-
- // Insert a new bitcast and return with the right type.
- IRBuilder<> Builder(CurBB);
- Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy));
- }
-
- // Leave behind the normal returns so we can merge control flow.
- std::swap(Returns, NormalReturns);
- }
-
- // Now that all of the transforms on the inlined code have taken place but
- // before we splice the inlined code into the CFG and lose track of which
- // blocks were actually inlined, collect the call sites. We only do this if
- // call graph updates weren't requested, as those provide value handle based
- // tracking of inlined call sites instead.
- if (InlinedFunctionInfo.ContainsCalls && !IFI.CG) {
- // Otherwise just collect the raw call sites that were inlined.
- for (BasicBlock &NewBB :
- make_range(FirstNewBlock->getIterator(), Caller->end()))
- for (Instruction &I : NewBB)
- if (auto CS = CallSite(&I))
- IFI.InlinedCallSites.push_back(CS);
- }
-
- // If we cloned in _exactly one_ basic block, and if that block ends in a
- // return instruction, we splice the body of the inlined callee directly into
- // the calling basic block.
- if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
- // Move all of the instructions right before the call.
- OrigBB->getInstList().splice(TheCall->getIterator(),
- FirstNewBlock->getInstList(),
- FirstNewBlock->begin(), FirstNewBlock->end());
- // Remove the cloned basic block.
- Caller->getBasicBlockList().pop_back();
-
- // If the call site was an invoke instruction, add a branch to the normal
- // destination.
- if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
- BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
- NewBr->setDebugLoc(Returns[0]->getDebugLoc());
- }
-
- // If the return instruction returned a value, replace uses of the call with
- // uses of the returned value.
- if (!TheCall->use_empty()) {
- ReturnInst *R = Returns[0];
- if (TheCall == R->getReturnValue())
- TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
- else
- TheCall->replaceAllUsesWith(R->getReturnValue());
- }
- // Since we are now done with the Call/Invoke, we can delete it.
- TheCall->eraseFromParent();
-
- // Since we are now done with the return instruction, delete it also.
- Returns[0]->eraseFromParent();
-
- // We are now done with the inlining.
- return true;
- }
-
- // Otherwise, we have the normal case, of more than one block to inline or
- // multiple return sites.
-
- // We want to clone the entire callee function into the hole between the
- // "starter" and "ender" blocks. How we accomplish this depends on whether
- // this is an invoke instruction or a call instruction.
- BasicBlock *AfterCallBB;
- BranchInst *CreatedBranchToNormalDest = nullptr;
- if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
-
- // Add an unconditional branch to make this look like the CallInst case...
- CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall);
-
- // Split the basic block. This guarantees that no PHI nodes will have to be
- // updated due to new incoming edges, and make the invoke case more
- // symmetric to the call case.
- AfterCallBB =
- OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(),
- CalledFunc->getName() + ".exit");
-
- } else { // It's a call
- // If this is a call instruction, we need to split the basic block that
- // the call lives in.
- //
- AfterCallBB = OrigBB->splitBasicBlock(TheCall->getIterator(),
- CalledFunc->getName() + ".exit");
- }
-
- if (IFI.CallerBFI) {
- // Copy original BB's block frequency to AfterCallBB
- IFI.CallerBFI->setBlockFreq(
- AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency());
- }
-
- // Change the branch that used to go to AfterCallBB to branch to the first
- // basic block of the inlined function.
- //
- Instruction *Br = OrigBB->getTerminator();
- assert(Br && Br->getOpcode() == Instruction::Br &&
- "splitBasicBlock broken!");
- Br->setOperand(0, &*FirstNewBlock);
-
- // Now that the function is correct, make it a little bit nicer. In
- // particular, move the basic blocks inserted from the end of the function
- // into the space made by splitting the source basic block.
- Caller->getBasicBlockList().splice(AfterCallBB->getIterator(),
- Caller->getBasicBlockList(), FirstNewBlock,
- Caller->end());
-
- // Handle all of the return instructions that we just cloned in, and eliminate
- // any users of the original call/invoke instruction.
- Type *RTy = CalledFunc->getReturnType();
-
- PHINode *PHI = nullptr;
- if (Returns.size() > 1) {
- // The PHI node should go at the front of the new basic block to merge all
- // possible incoming values.
- if (!TheCall->use_empty()) {
- PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
- &AfterCallBB->front());
- // Anything that used the result of the function call should now use the
- // PHI node as their operand.
- TheCall->replaceAllUsesWith(PHI);
- }
-
- // Loop over all of the return instructions adding entries to the PHI node
- // as appropriate.
- if (PHI) {
- for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
- ReturnInst *RI = Returns[i];
- assert(RI->getReturnValue()->getType() == PHI->getType() &&
- "Ret value not consistent in function!");
- PHI->addIncoming(RI->getReturnValue(), RI->getParent());
- }
- }
-
- // Add a branch to the merge points and remove return instructions.
- DebugLoc Loc;
- for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
- ReturnInst *RI = Returns[i];
- BranchInst* BI = BranchInst::Create(AfterCallBB, RI);
- Loc = RI->getDebugLoc();
- BI->setDebugLoc(Loc);
- RI->eraseFromParent();
- }
- // We need to set the debug location to *somewhere* inside the
- // inlined function. The line number may be nonsensical, but the
- // instruction will at least be associated with the right
- // function.
- if (CreatedBranchToNormalDest)
- CreatedBranchToNormalDest->setDebugLoc(Loc);
- } else if (!Returns.empty()) {
- // Otherwise, if there is exactly one return value, just replace anything
- // using the return value of the call with the computed value.
- if (!TheCall->use_empty()) {
- if (TheCall == Returns[0]->getReturnValue())
- TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
- else
- TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
- }
-
- // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
- BasicBlock *ReturnBB = Returns[0]->getParent();
- ReturnBB->replaceAllUsesWith(AfterCallBB);
-
- // Splice the code from the return block into the block that it will return
- // to, which contains the code that was after the call.
- AfterCallBB->getInstList().splice(AfterCallBB->begin(),
- ReturnBB->getInstList());
-
- if (CreatedBranchToNormalDest)
- CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
-
- // Delete the return instruction now and empty ReturnBB now.
- Returns[0]->eraseFromParent();
- ReturnBB->eraseFromParent();
- } else if (!TheCall->use_empty()) {
- // No returns, but something is using the return value of the call. Just
- // nuke the result.
- TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
- }
-
- // Since we are now done with the Call/Invoke, we can delete it.
- TheCall->eraseFromParent();
-
- // If we inlined any musttail calls and the original return is now
- // unreachable, delete it. It can only contain a bitcast and ret.
- if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB))
- AfterCallBB->eraseFromParent();
-
- // We should always be able to fold the entry block of the function into the
- // single predecessor of the block...
- assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
- BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);
-
- // Splice the code entry block into calling block, right before the
- // unconditional branch.
- CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes
- OrigBB->getInstList().splice(Br->getIterator(), CalleeEntry->getInstList());
-
- // Remove the unconditional branch.
- OrigBB->getInstList().erase(Br);
-
- // Now we can remove the CalleeEntry block, which is now empty.
- Caller->getBasicBlockList().erase(CalleeEntry);
-
- // If we inserted a phi node, check to see if it has a single value (e.g. all
- // the entries are the same or undef). If so, remove the PHI so it doesn't
- // block other optimizations.
- if (PHI) {
- AssumptionCache *AC =
- IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr;
- auto &DL = Caller->getParent()->getDataLayout();
- if (Value *V = SimplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) {
- PHI->replaceAllUsesWith(V);
- PHI->eraseFromParent();
- }
- }
-
- return true;
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
deleted file mode 100644
index 6c4fc1ceb991..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-//===- InstructionNamer.cpp - Give anonymous instructions names -----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This is a little utility pass that gives instructions names, this is mostly
-// useful when diffing the effect of an optimization because deleting an
-// unnamed instruction can change all other instruction numbering, making the
-// diff very noisy.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils.h"
-using namespace llvm;
-
-namespace {
- struct InstNamer : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- InstNamer() : FunctionPass(ID) {
- initializeInstNamerPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &Info) const override {
- Info.setPreservesAll();
- }
-
- bool runOnFunction(Function &F) override {
- for (auto &Arg : F.args())
- if (!Arg.hasName())
- Arg.setName("arg");
-
- for (BasicBlock &BB : F) {
- if (!BB.hasName())
- BB.setName("bb");
-
- for (Instruction &I : BB)
- if (!I.hasName() && !I.getType()->isVoidTy())
- I.setName("tmp");
- }
- return true;
- }
- };
-
- char InstNamer::ID = 0;
-}
-
-INITIALIZE_PASS(InstNamer, "instnamer",
- "Assign names to anonymous instructions", false, false)
-char &llvm::InstructionNamerID = InstNamer::ID;
-//===----------------------------------------------------------------------===//
-//
-// InstructionNamer - Give any unnamed non-void instructions "tmp" names.
-//
-FunctionPass *llvm::createInstructionNamerPass() {
- return new InstNamer();
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
deleted file mode 100644
index 9082049c82da..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
+++ /dev/null
@@ -1,673 +0,0 @@
-//===-- IntegerDivision.cpp - Expand integer division ---------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains an implementation of 32bit and 64bit scalar integer
-// division for targets that don't have native support. It's largely derived
-// from compiler-rt's implementations of __udivsi3 and __udivmoddi4,
-// but hand-tuned for targets that prefer less control flow.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/IntegerDivision.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
-#include <utility>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "integer-division"
-
-/// Generate code to compute the remainder of two signed integers. Returns the
-/// remainder, which will have the sign of the dividend. Builder's insert point
-/// should be pointing where the caller wants code generated, e.g. at the srem
-/// instruction. This will generate a urem in the process, and Builder's insert
-/// point will be pointing at the uren (if present, i.e. not folded), ready to
-/// be expanded if the user wishes
-static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor,
- IRBuilder<> &Builder) {
- unsigned BitWidth = Dividend->getType()->getIntegerBitWidth();
- ConstantInt *Shift;
-
- if (BitWidth == 64) {
- Shift = Builder.getInt64(63);
- } else {
- assert(BitWidth == 32 && "Unexpected bit width");
- Shift = Builder.getInt32(31);
- }
-
- // Following instructions are generated for both i32 (shift 31) and
- // i64 (shift 63).
-
- // ; %dividend_sgn = ashr i32 %dividend, 31
- // ; %divisor_sgn = ashr i32 %divisor, 31
- // ; %dvd_xor = xor i32 %dividend, %dividend_sgn
- // ; %dvs_xor = xor i32 %divisor, %divisor_sgn
- // ; %u_dividend = sub i32 %dvd_xor, %dividend_sgn
- // ; %u_divisor = sub i32 %dvs_xor, %divisor_sgn
- // ; %urem = urem i32 %dividend, %divisor
- // ; %xored = xor i32 %urem, %dividend_sgn
- // ; %srem = sub i32 %xored, %dividend_sgn
- Value *DividendSign = Builder.CreateAShr(Dividend, Shift);
- Value *DivisorSign = Builder.CreateAShr(Divisor, Shift);
- Value *DvdXor = Builder.CreateXor(Dividend, DividendSign);
- Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign);
- Value *UDividend = Builder.CreateSub(DvdXor, DividendSign);
- Value *UDivisor = Builder.CreateSub(DvsXor, DivisorSign);
- Value *URem = Builder.CreateURem(UDividend, UDivisor);
- Value *Xored = Builder.CreateXor(URem, DividendSign);
- Value *SRem = Builder.CreateSub(Xored, DividendSign);
-
- if (Instruction *URemInst = dyn_cast<Instruction>(URem))
- Builder.SetInsertPoint(URemInst);
-
- return SRem;
-}
-
-
-/// Generate code to compute the remainder of two unsigned integers. Returns the
-/// remainder. Builder's insert point should be pointing where the caller wants
-/// code generated, e.g. at the urem instruction. This will generate a udiv in
-/// the process, and Builder's insert point will be pointing at the udiv (if
-/// present, i.e. not folded), ready to be expanded if the user wishes
-static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor,
- IRBuilder<> &Builder) {
- // Remainder = Dividend - Quotient*Divisor
-
- // Following instructions are generated for both i32 and i64
-
- // ; %quotient = udiv i32 %dividend, %divisor
- // ; %product = mul i32 %divisor, %quotient
- // ; %remainder = sub i32 %dividend, %product
- Value *Quotient = Builder.CreateUDiv(Dividend, Divisor);
- Value *Product = Builder.CreateMul(Divisor, Quotient);
- Value *Remainder = Builder.CreateSub(Dividend, Product);
-
- if (Instruction *UDiv = dyn_cast<Instruction>(Quotient))
- Builder.SetInsertPoint(UDiv);
-
- return Remainder;
-}
-
-/// Generate code to divide two signed integers. Returns the quotient, rounded
-/// towards 0. Builder's insert point should be pointing where the caller wants
-/// code generated, e.g. at the sdiv instruction. This will generate a udiv in
-/// the process, and Builder's insert point will be pointing at the udiv (if
-/// present, i.e. not folded), ready to be expanded if the user wishes.
-static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
- IRBuilder<> &Builder) {
- // Implementation taken from compiler-rt's __divsi3 and __divdi3
-
- unsigned BitWidth = Dividend->getType()->getIntegerBitWidth();
- ConstantInt *Shift;
-
- if (BitWidth == 64) {
- Shift = Builder.getInt64(63);
- } else {
- assert(BitWidth == 32 && "Unexpected bit width");
- Shift = Builder.getInt32(31);
- }
-
- // Following instructions are generated for both i32 (shift 31) and
- // i64 (shift 63).
-
- // ; %tmp = ashr i32 %dividend, 31
- // ; %tmp1 = ashr i32 %divisor, 31
- // ; %tmp2 = xor i32 %tmp, %dividend
- // ; %u_dvnd = sub nsw i32 %tmp2, %tmp
- // ; %tmp3 = xor i32 %tmp1, %divisor
- // ; %u_dvsr = sub nsw i32 %tmp3, %tmp1
- // ; %q_sgn = xor i32 %tmp1, %tmp
- // ; %q_mag = udiv i32 %u_dvnd, %u_dvsr
- // ; %tmp4 = xor i32 %q_mag, %q_sgn
- // ; %q = sub i32 %tmp4, %q_sgn
- Value *Tmp = Builder.CreateAShr(Dividend, Shift);
- Value *Tmp1 = Builder.CreateAShr(Divisor, Shift);
- Value *Tmp2 = Builder.CreateXor(Tmp, Dividend);
- Value *U_Dvnd = Builder.CreateSub(Tmp2, Tmp);
- Value *Tmp3 = Builder.CreateXor(Tmp1, Divisor);
- Value *U_Dvsr = Builder.CreateSub(Tmp3, Tmp1);
- Value *Q_Sgn = Builder.CreateXor(Tmp1, Tmp);
- Value *Q_Mag = Builder.CreateUDiv(U_Dvnd, U_Dvsr);
- Value *Tmp4 = Builder.CreateXor(Q_Mag, Q_Sgn);
- Value *Q = Builder.CreateSub(Tmp4, Q_Sgn);
-
- if (Instruction *UDiv = dyn_cast<Instruction>(Q_Mag))
- Builder.SetInsertPoint(UDiv);
-
- return Q;
-}
-
-/// Generates code to divide two unsigned scalar 32-bit or 64-bit integers.
-/// Returns the quotient, rounded towards 0. Builder's insert point should
-/// point where the caller wants code generated, e.g. at the udiv instruction.
-static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
- IRBuilder<> &Builder) {
- // The basic algorithm can be found in the compiler-rt project's
- // implementation of __udivsi3.c. Here, we do a lower-level IR based approach
- // that's been hand-tuned to lessen the amount of control flow involved.
-
- // Some helper values
- IntegerType *DivTy = cast<IntegerType>(Dividend->getType());
- unsigned BitWidth = DivTy->getBitWidth();
-
- ConstantInt *Zero;
- ConstantInt *One;
- ConstantInt *NegOne;
- ConstantInt *MSB;
-
- if (BitWidth == 64) {
- Zero = Builder.getInt64(0);
- One = Builder.getInt64(1);
- NegOne = ConstantInt::getSigned(DivTy, -1);
- MSB = Builder.getInt64(63);
- } else {
- assert(BitWidth == 32 && "Unexpected bit width");
- Zero = Builder.getInt32(0);
- One = Builder.getInt32(1);
- NegOne = ConstantInt::getSigned(DivTy, -1);
- MSB = Builder.getInt32(31);
- }
-
- ConstantInt *True = Builder.getTrue();
-
- BasicBlock *IBB = Builder.GetInsertBlock();
- Function *F = IBB->getParent();
- Function *CTLZ = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
- DivTy);
-
- // Our CFG is going to look like:
- // +---------------------+
- // | special-cases |
- // | ... |
- // +---------------------+
- // | |
- // | +----------+
- // | | bb1 |
- // | | ... |
- // | +----------+
- // | | |
- // | | +------------+
- // | | | preheader |
- // | | | ... |
- // | | +------------+
- // | | |
- // | | | +---+
- // | | | | |
- // | | +------------+ |
- // | | | do-while | |
- // | | | ... | |
- // | | +------------+ |
- // | | | | |
- // | +-----------+ +---+
- // | | loop-exit |
- // | | ... |
- // | +-----------+
- // | |
- // +-------+
- // | ... |
- // | end |
- // +-------+
- BasicBlock *SpecialCases = Builder.GetInsertBlock();
- SpecialCases->setName(Twine(SpecialCases->getName(), "_udiv-special-cases"));
- BasicBlock *End = SpecialCases->splitBasicBlock(Builder.GetInsertPoint(),
- "udiv-end");
- BasicBlock *LoopExit = BasicBlock::Create(Builder.getContext(),
- "udiv-loop-exit", F, End);
- BasicBlock *DoWhile = BasicBlock::Create(Builder.getContext(),
- "udiv-do-while", F, End);
- BasicBlock *Preheader = BasicBlock::Create(Builder.getContext(),
- "udiv-preheader", F, End);
- BasicBlock *BB1 = BasicBlock::Create(Builder.getContext(),
- "udiv-bb1", F, End);
-
- // We'll be overwriting the terminator to insert our extra blocks
- SpecialCases->getTerminator()->eraseFromParent();
-
- // Same instructions are generated for both i32 (msb 31) and i64 (msb 63).
-
- // First off, check for special cases: dividend or divisor is zero, divisor
- // is greater than dividend, and divisor is 1.
- // ; special-cases:
- // ; %ret0_1 = icmp eq i32 %divisor, 0
- // ; %ret0_2 = icmp eq i32 %dividend, 0
- // ; %ret0_3 = or i1 %ret0_1, %ret0_2
- // ; %tmp0 = tail call i32 @llvm.ctlz.i32(i32 %divisor, i1 true)
- // ; %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true)
- // ; %sr = sub nsw i32 %tmp0, %tmp1
- // ; %ret0_4 = icmp ugt i32 %sr, 31
- // ; %ret0 = or i1 %ret0_3, %ret0_4
- // ; %retDividend = icmp eq i32 %sr, 31
- // ; %retVal = select i1 %ret0, i32 0, i32 %dividend
- // ; %earlyRet = or i1 %ret0, %retDividend
- // ; br i1 %earlyRet, label %end, label %bb1
- Builder.SetInsertPoint(SpecialCases);
- Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero);
- Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero);
- Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2);
- Value *Tmp0 = Builder.CreateCall(CTLZ, {Divisor, True});
- Value *Tmp1 = Builder.CreateCall(CTLZ, {Dividend, True});
- Value *SR = Builder.CreateSub(Tmp0, Tmp1);
- Value *Ret0_4 = Builder.CreateICmpUGT(SR, MSB);
- Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4);
- Value *RetDividend = Builder.CreateICmpEQ(SR, MSB);
- Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend);
- Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend);
- Builder.CreateCondBr(EarlyRet, End, BB1);
-
- // ; bb1: ; preds = %special-cases
- // ; %sr_1 = add i32 %sr, 1
- // ; %tmp2 = sub i32 31, %sr
- // ; %q = shl i32 %dividend, %tmp2
- // ; %skipLoop = icmp eq i32 %sr_1, 0
- // ; br i1 %skipLoop, label %loop-exit, label %preheader
- Builder.SetInsertPoint(BB1);
- Value *SR_1 = Builder.CreateAdd(SR, One);
- Value *Tmp2 = Builder.CreateSub(MSB, SR);
- Value *Q = Builder.CreateShl(Dividend, Tmp2);
- Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero);
- Builder.CreateCondBr(SkipLoop, LoopExit, Preheader);
-
- // ; preheader: ; preds = %bb1
- // ; %tmp3 = lshr i32 %dividend, %sr_1
- // ; %tmp4 = add i32 %divisor, -1
- // ; br label %do-while
- Builder.SetInsertPoint(Preheader);
- Value *Tmp3 = Builder.CreateLShr(Dividend, SR_1);
- Value *Tmp4 = Builder.CreateAdd(Divisor, NegOne);
- Builder.CreateBr(DoWhile);
-
- // ; do-while: ; preds = %do-while, %preheader
- // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
- // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ]
- // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ]
- // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ]
- // ; %tmp5 = shl i32 %r_1, 1
- // ; %tmp6 = lshr i32 %q_2, 31
- // ; %tmp7 = or i32 %tmp5, %tmp6
- // ; %tmp8 = shl i32 %q_2, 1
- // ; %q_1 = or i32 %carry_1, %tmp8
- // ; %tmp9 = sub i32 %tmp4, %tmp7
- // ; %tmp10 = ashr i32 %tmp9, 31
- // ; %carry = and i32 %tmp10, 1
- // ; %tmp11 = and i32 %tmp10, %divisor
- // ; %r = sub i32 %tmp7, %tmp11
- // ; %sr_2 = add i32 %sr_3, -1
- // ; %tmp12 = icmp eq i32 %sr_2, 0
- // ; br i1 %tmp12, label %loop-exit, label %do-while
- Builder.SetInsertPoint(DoWhile);
- PHINode *Carry_1 = Builder.CreatePHI(DivTy, 2);
- PHINode *SR_3 = Builder.CreatePHI(DivTy, 2);
- PHINode *R_1 = Builder.CreatePHI(DivTy, 2);
- PHINode *Q_2 = Builder.CreatePHI(DivTy, 2);
- Value *Tmp5 = Builder.CreateShl(R_1, One);
- Value *Tmp6 = Builder.CreateLShr(Q_2, MSB);
- Value *Tmp7 = Builder.CreateOr(Tmp5, Tmp6);
- Value *Tmp8 = Builder.CreateShl(Q_2, One);
- Value *Q_1 = Builder.CreateOr(Carry_1, Tmp8);
- Value *Tmp9 = Builder.CreateSub(Tmp4, Tmp7);
- Value *Tmp10 = Builder.CreateAShr(Tmp9, MSB);
- Value *Carry = Builder.CreateAnd(Tmp10, One);
- Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor);
- Value *R = Builder.CreateSub(Tmp7, Tmp11);
- Value *SR_2 = Builder.CreateAdd(SR_3, NegOne);
- Value *Tmp12 = Builder.CreateICmpEQ(SR_2, Zero);
- Builder.CreateCondBr(Tmp12, LoopExit, DoWhile);
-
- // ; loop-exit: ; preds = %do-while, %bb1
- // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
- // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ]
- // ; %tmp13 = shl i32 %q_3, 1
- // ; %q_4 = or i32 %carry_2, %tmp13
- // ; br label %end
- Builder.SetInsertPoint(LoopExit);
- PHINode *Carry_2 = Builder.CreatePHI(DivTy, 2);
- PHINode *Q_3 = Builder.CreatePHI(DivTy, 2);
- Value *Tmp13 = Builder.CreateShl(Q_3, One);
- Value *Q_4 = Builder.CreateOr(Carry_2, Tmp13);
- Builder.CreateBr(End);
-
- // ; end: ; preds = %loop-exit, %special-cases
- // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
- // ; ret i32 %q_5
- Builder.SetInsertPoint(End, End->begin());
- PHINode *Q_5 = Builder.CreatePHI(DivTy, 2);
-
- // Populate the Phis, since all values have now been created. Our Phis were:
- // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
- Carry_1->addIncoming(Zero, Preheader);
- Carry_1->addIncoming(Carry, DoWhile);
- // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ]
- SR_3->addIncoming(SR_1, Preheader);
- SR_3->addIncoming(SR_2, DoWhile);
- // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ]
- R_1->addIncoming(Tmp3, Preheader);
- R_1->addIncoming(R, DoWhile);
- // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ]
- Q_2->addIncoming(Q, Preheader);
- Q_2->addIncoming(Q_1, DoWhile);
- // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
- Carry_2->addIncoming(Zero, BB1);
- Carry_2->addIncoming(Carry, DoWhile);
- // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ]
- Q_3->addIncoming(Q, BB1);
- Q_3->addIncoming(Q_1, DoWhile);
- // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
- Q_5->addIncoming(Q_4, LoopExit);
- Q_5->addIncoming(RetVal, SpecialCases);
-
- return Q_5;
-}
-
-/// Generate code to calculate the remainder of two integers, replacing Rem with
-/// the generated code. This currently generates code using the udiv expansion,
-/// but future work includes generating more specialized code, e.g. when more
-/// information about the operands are known. Implements both 32bit and 64bit
-/// scalar division.
-///
-/// Replace Rem with generated code.
-bool llvm::expandRemainder(BinaryOperator *Rem) {
- assert((Rem->getOpcode() == Instruction::SRem ||
- Rem->getOpcode() == Instruction::URem) &&
- "Trying to expand remainder from a non-remainder function");
-
- IRBuilder<> Builder(Rem);
-
- assert(!Rem->getType()->isVectorTy() && "Div over vectors not supported");
- assert((Rem->getType()->getIntegerBitWidth() == 32 ||
- Rem->getType()->getIntegerBitWidth() == 64) &&
- "Div of bitwidth other than 32 or 64 not supported");
-
- // First prepare the sign if it's a signed remainder
- if (Rem->getOpcode() == Instruction::SRem) {
- Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0),
- Rem->getOperand(1), Builder);
-
- // Check whether this is the insert point while Rem is still valid.
- bool IsInsertPoint = Rem->getIterator() == Builder.GetInsertPoint();
- Rem->replaceAllUsesWith(Remainder);
- Rem->dropAllReferences();
- Rem->eraseFromParent();
-
- // If we didn't actually generate an urem instruction, we're done
- // This happens for example if the input were constant. In this case the
- // Builder insertion point was unchanged
- if (IsInsertPoint)
- return true;
-
- BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
- Rem = BO;
- }
-
- Value *Remainder = generatedUnsignedRemainderCode(Rem->getOperand(0),
- Rem->getOperand(1),
- Builder);
-
- Rem->replaceAllUsesWith(Remainder);
- Rem->dropAllReferences();
- Rem->eraseFromParent();
-
- // Expand the udiv
- if (BinaryOperator *UDiv = dyn_cast<BinaryOperator>(Builder.GetInsertPoint())) {
- assert(UDiv->getOpcode() == Instruction::UDiv && "Non-udiv in expansion?");
- expandDivision(UDiv);
- }
-
- return true;
-}
-
-
-/// Generate code to divide two integers, replacing Div with the generated
-/// code. This currently generates code similarly to compiler-rt's
-/// implementations, but future work includes generating more specialized code
-/// when more information about the operands are known. Implements both
-/// 32bit and 64bit scalar division.
-///
-/// Replace Div with generated code.
-bool llvm::expandDivision(BinaryOperator *Div) {
- assert((Div->getOpcode() == Instruction::SDiv ||
- Div->getOpcode() == Instruction::UDiv) &&
- "Trying to expand division from a non-division function");
-
- IRBuilder<> Builder(Div);
-
- assert(!Div->getType()->isVectorTy() && "Div over vectors not supported");
- assert((Div->getType()->getIntegerBitWidth() == 32 ||
- Div->getType()->getIntegerBitWidth() == 64) &&
- "Div of bitwidth other than 32 or 64 not supported");
-
- // First prepare the sign if it's a signed division
- if (Div->getOpcode() == Instruction::SDiv) {
- // Lower the code to unsigned division, and reset Div to point to the udiv.
- Value *Quotient = generateSignedDivisionCode(Div->getOperand(0),
- Div->getOperand(1), Builder);
-
- // Check whether this is the insert point while Div is still valid.
- bool IsInsertPoint = Div->getIterator() == Builder.GetInsertPoint();
- Div->replaceAllUsesWith(Quotient);
- Div->dropAllReferences();
- Div->eraseFromParent();
-
- // If we didn't actually generate an udiv instruction, we're done
- // This happens for example if the input were constant. In this case the
- // Builder insertion point was unchanged
- if (IsInsertPoint)
- return true;
-
- BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
- Div = BO;
- }
-
- // Insert the unsigned division code
- Value *Quotient = generateUnsignedDivisionCode(Div->getOperand(0),
- Div->getOperand(1),
- Builder);
- Div->replaceAllUsesWith(Quotient);
- Div->dropAllReferences();
- Div->eraseFromParent();
-
- return true;
-}
-
-/// Generate code to compute the remainder of two integers of bitwidth up to
-/// 32 bits. Uses the above routines and extends the inputs/truncates the
-/// outputs to operate in 32 bits; that is, these routines are good for targets
-/// that have no or very little suppport for smaller than 32 bit integer
-/// arithmetic.
-///
-/// Replace Rem with emulation code.
-bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
- assert((Rem->getOpcode() == Instruction::SRem ||
- Rem->getOpcode() == Instruction::URem) &&
- "Trying to expand remainder from a non-remainder function");
-
- Type *RemTy = Rem->getType();
- assert(!RemTy->isVectorTy() && "Div over vectors not supported");
-
- unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
-
- assert(RemTyBitWidth <= 32 &&
- "Div of bitwidth greater than 32 not supported");
-
- if (RemTyBitWidth == 32)
- return expandRemainder(Rem);
-
- // If bitwidth smaller than 32 extend inputs, extend output and proceed
- // with 32 bit division.
- IRBuilder<> Builder(Rem);
-
- Value *ExtDividend;
- Value *ExtDivisor;
- Value *ExtRem;
- Value *Trunc;
- Type *Int32Ty = Builder.getInt32Ty();
-
- if (Rem->getOpcode() == Instruction::SRem) {
- ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int32Ty);
- ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int32Ty);
- ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor);
- } else {
- ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int32Ty);
- ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int32Ty);
- ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor);
- }
- Trunc = Builder.CreateTrunc(ExtRem, RemTy);
-
- Rem->replaceAllUsesWith(Trunc);
- Rem->dropAllReferences();
- Rem->eraseFromParent();
-
- return expandRemainder(cast<BinaryOperator>(ExtRem));
-}
-
-/// Generate code to compute the remainder of two integers of bitwidth up to
-/// 64 bits. Uses the above routines and extends the inputs/truncates the
-/// outputs to operate in 64 bits.
-///
-/// Replace Rem with emulation code.
-bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) {
- assert((Rem->getOpcode() == Instruction::SRem ||
- Rem->getOpcode() == Instruction::URem) &&
- "Trying to expand remainder from a non-remainder function");
-
- Type *RemTy = Rem->getType();
- assert(!RemTy->isVectorTy() && "Div over vectors not supported");
-
- unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
-
- assert(RemTyBitWidth <= 64 && "Div of bitwidth greater than 64 not supported");
-
- if (RemTyBitWidth == 64)
- return expandRemainder(Rem);
-
- // If bitwidth smaller than 64 extend inputs, extend output and proceed
- // with 64 bit division.
- IRBuilder<> Builder(Rem);
-
- Value *ExtDividend;
- Value *ExtDivisor;
- Value *ExtRem;
- Value *Trunc;
- Type *Int64Ty = Builder.getInt64Ty();
-
- if (Rem->getOpcode() == Instruction::SRem) {
- ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int64Ty);
- ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int64Ty);
- ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor);
- } else {
- ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int64Ty);
- ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int64Ty);
- ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor);
- }
- Trunc = Builder.CreateTrunc(ExtRem, RemTy);
-
- Rem->replaceAllUsesWith(Trunc);
- Rem->dropAllReferences();
- Rem->eraseFromParent();
-
- return expandRemainder(cast<BinaryOperator>(ExtRem));
-}
-
-/// Generate code to divide two integers of bitwidth up to 32 bits. Uses the
-/// above routines and extends the inputs/truncates the outputs to operate
-/// in 32 bits; that is, these routines are good for targets that have no
-/// or very little support for smaller than 32 bit integer arithmetic.
-///
-/// Replace Div with emulation code.
-bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
- assert((Div->getOpcode() == Instruction::SDiv ||
- Div->getOpcode() == Instruction::UDiv) &&
- "Trying to expand division from a non-division function");
-
- Type *DivTy = Div->getType();
- assert(!DivTy->isVectorTy() && "Div over vectors not supported");
-
- unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
-
- assert(DivTyBitWidth <= 32 && "Div of bitwidth greater than 32 not supported");
-
- if (DivTyBitWidth == 32)
- return expandDivision(Div);
-
- // If bitwidth smaller than 32 extend inputs, extend output and proceed
- // with 32 bit division.
- IRBuilder<> Builder(Div);
-
- Value *ExtDividend;
- Value *ExtDivisor;
- Value *ExtDiv;
- Value *Trunc;
- Type *Int32Ty = Builder.getInt32Ty();
-
- if (Div->getOpcode() == Instruction::SDiv) {
- ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int32Ty);
- ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int32Ty);
- ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor);
- } else {
- ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int32Ty);
- ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int32Ty);
- ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
- }
- Trunc = Builder.CreateTrunc(ExtDiv, DivTy);
-
- Div->replaceAllUsesWith(Trunc);
- Div->dropAllReferences();
- Div->eraseFromParent();
-
- return expandDivision(cast<BinaryOperator>(ExtDiv));
-}
-
-/// Generate code to divide two integers of bitwidth up to 64 bits. Uses the
-/// above routines and extends the inputs/truncates the outputs to operate
-/// in 64 bits.
-///
-/// Replace Div with emulation code.
-bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) {
- assert((Div->getOpcode() == Instruction::SDiv ||
- Div->getOpcode() == Instruction::UDiv) &&
- "Trying to expand division from a non-division function");
-
- Type *DivTy = Div->getType();
- assert(!DivTy->isVectorTy() && "Div over vectors not supported");
-
- unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
-
- assert(DivTyBitWidth <= 64 &&
- "Div of bitwidth greater than 64 not supported");
-
- if (DivTyBitWidth == 64)
- return expandDivision(Div);
-
- // If bitwidth smaller than 64 extend inputs, extend output and proceed
- // with 64 bit division.
- IRBuilder<> Builder(Div);
-
- Value *ExtDividend;
- Value *ExtDivisor;
- Value *ExtDiv;
- Value *Trunc;
- Type *Int64Ty = Builder.getInt64Ty();
-
- if (Div->getOpcode() == Instruction::SDiv) {
- ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int64Ty);
- ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int64Ty);
- ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor);
- } else {
- ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int64Ty);
- ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int64Ty);
- ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
- }
- Trunc = Builder.CreateTrunc(ExtDiv, DivTy);
-
- Div->replaceAllUsesWith(Trunc);
- Div->dropAllReferences();
- Div->eraseFromParent();
-
- return expandDivision(cast<BinaryOperator>(ExtDiv));
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
deleted file mode 100644
index 29e7c5260f46..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ /dev/null
@@ -1,497 +0,0 @@
-//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass transforms loops by placing phi nodes at the end of the loops for
-// all values that are live across the loop boundary. For example, it turns
-// the left into the right code:
-//
-// for (...) for (...)
-// if (c) if (c)
-// X1 = ... X1 = ...
-// else else
-// X2 = ... X2 = ...
-// X3 = phi(X1, X2) X3 = phi(X1, X2)
-// ... = X3 + 4 X4 = phi(X3)
-// ... = X4 + 4
-//
-// This is still valid LLVM; the extra phi nodes are purely redundant, and will
-// be trivially eliminated by InstCombine. The major benefit of this
-// transformation is that it makes many other loop optimizations, such as
-// LoopUnswitching, simpler.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/LCSSA.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/MemorySSA.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/PredIteratorCache.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "lcssa"
-
-STATISTIC(NumLCSSA, "Number of live out of a loop variables");
-
-#ifdef EXPENSIVE_CHECKS
-static bool VerifyLoopLCSSA = true;
-#else
-static bool VerifyLoopLCSSA = false;
-#endif
-static cl::opt<bool, true>
- VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA),
- cl::Hidden,
- cl::desc("Verify loop lcssa form (time consuming)"));
-
-/// Return true if the specified block is in the list.
-static bool isExitBlock(BasicBlock *BB,
- const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
- return is_contained(ExitBlocks, BB);
-}
-
-/// For every instruction from the worklist, check to see if it has any uses
-/// that are outside the current loop. If so, insert LCSSA PHI nodes and
-/// rewrite the uses.
-bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
- DominatorTree &DT, LoopInfo &LI) {
- SmallVector<Use *, 16> UsesToRewrite;
- SmallSetVector<PHINode *, 16> PHIsToRemove;
- PredIteratorCache PredCache;
- bool Changed = false;
-
- // Cache the Loop ExitBlocks across this loop. We expect to get a lot of
- // instructions within the same loops, computing the exit blocks is
- // expensive, and we're not mutating the loop structure.
- SmallDenseMap<Loop*, SmallVector<BasicBlock *,1>> LoopExitBlocks;
-
- while (!Worklist.empty()) {
- UsesToRewrite.clear();
-
- Instruction *I = Worklist.pop_back_val();
- assert(!I->getType()->isTokenTy() && "Tokens shouldn't be in the worklist");
- BasicBlock *InstBB = I->getParent();
- Loop *L = LI.getLoopFor(InstBB);
- assert(L && "Instruction belongs to a BB that's not part of a loop");
- if (!LoopExitBlocks.count(L))
- L->getExitBlocks(LoopExitBlocks[L]);
- assert(LoopExitBlocks.count(L));
- const SmallVectorImpl<BasicBlock *> &ExitBlocks = LoopExitBlocks[L];
-
- if (ExitBlocks.empty())
- continue;
-
- for (Use &U : I->uses()) {
- Instruction *User = cast<Instruction>(U.getUser());
- BasicBlock *UserBB = User->getParent();
- if (auto *PN = dyn_cast<PHINode>(User))
- UserBB = PN->getIncomingBlock(U);
-
- if (InstBB != UserBB && !L->contains(UserBB))
- UsesToRewrite.push_back(&U);
- }
-
- // If there are no uses outside the loop, exit with no change.
- if (UsesToRewrite.empty())
- continue;
-
- ++NumLCSSA; // We are applying the transformation
-
- // Invoke instructions are special in that their result value is not
- // available along their unwind edge. The code below tests to see whether
- // DomBB dominates the value, so adjust DomBB to the normal destination
- // block, which is effectively where the value is first usable.
- BasicBlock *DomBB = InstBB;
- if (auto *Inv = dyn_cast<InvokeInst>(I))
- DomBB = Inv->getNormalDest();
-
- DomTreeNode *DomNode = DT.getNode(DomBB);
-
- SmallVector<PHINode *, 16> AddedPHIs;
- SmallVector<PHINode *, 8> PostProcessPHIs;
-
- SmallVector<PHINode *, 4> InsertedPHIs;
- SSAUpdater SSAUpdate(&InsertedPHIs);
- SSAUpdate.Initialize(I->getType(), I->getName());
-
- // Insert the LCSSA phi's into all of the exit blocks dominated by the
- // value, and add them to the Phi's map.
- for (BasicBlock *ExitBB : ExitBlocks) {
- if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
- continue;
-
- // If we already inserted something for this BB, don't reprocess it.
- if (SSAUpdate.HasValueForBlock(ExitBB))
- continue;
-
- PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB),
- I->getName() + ".lcssa", &ExitBB->front());
- // Get the debug location from the original instruction.
- PN->setDebugLoc(I->getDebugLoc());
- // Add inputs from inside the loop for this PHI.
- for (BasicBlock *Pred : PredCache.get(ExitBB)) {
- PN->addIncoming(I, Pred);
-
- // If the exit block has a predecessor not within the loop, arrange for
- // the incoming value use corresponding to that predecessor to be
- // rewritten in terms of a different LCSSA PHI.
- if (!L->contains(Pred))
- UsesToRewrite.push_back(
- &PN->getOperandUse(PN->getOperandNumForIncomingValue(
- PN->getNumIncomingValues() - 1)));
- }
-
- AddedPHIs.push_back(PN);
-
- // Remember that this phi makes the value alive in this block.
- SSAUpdate.AddAvailableValue(ExitBB, PN);
-
- // LoopSimplify might fail to simplify some loops (e.g. when indirect
- // branches are involved). In such situations, it might happen that an
- // exit for Loop L1 is the header of a disjoint Loop L2. Thus, when we
- // create PHIs in such an exit block, we are also inserting PHIs into L2's
- // header. This could break LCSSA form for L2 because these inserted PHIs
- // can also have uses outside of L2. Remember all PHIs in such situation
- // as to revisit than later on. FIXME: Remove this if indirectbr support
- // into LoopSimplify gets improved.
- if (auto *OtherLoop = LI.getLoopFor(ExitBB))
- if (!L->contains(OtherLoop))
- PostProcessPHIs.push_back(PN);
- }
-
- // Rewrite all uses outside the loop in terms of the new PHIs we just
- // inserted.
- for (Use *UseToRewrite : UsesToRewrite) {
- // If this use is in an exit block, rewrite to use the newly inserted PHI.
- // This is required for correctness because SSAUpdate doesn't handle uses
- // in the same block. It assumes the PHI we inserted is at the end of the
- // block.
- Instruction *User = cast<Instruction>(UseToRewrite->getUser());
- BasicBlock *UserBB = User->getParent();
- if (auto *PN = dyn_cast<PHINode>(User))
- UserBB = PN->getIncomingBlock(*UseToRewrite);
-
- if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
- // Tell the VHs that the uses changed. This updates SCEV's caches.
- if (UseToRewrite->get()->hasValueHandle())
- ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front());
- UseToRewrite->set(&UserBB->front());
- continue;
- }
-
- // If we added a single PHI, it must dominate all uses and we can directly
- // rename it.
- if (AddedPHIs.size() == 1) {
- // Tell the VHs that the uses changed. This updates SCEV's caches.
- // We might call ValueIsRAUWd multiple times for the same value.
- if (UseToRewrite->get()->hasValueHandle())
- ValueHandleBase::ValueIsRAUWd(*UseToRewrite, AddedPHIs[0]);
- UseToRewrite->set(AddedPHIs[0]);
- continue;
- }
-
- // Otherwise, do full PHI insertion.
- SSAUpdate.RewriteUse(*UseToRewrite);
- }
-
- SmallVector<DbgValueInst *, 4> DbgValues;
- llvm::findDbgValues(DbgValues, I);
-
- // Update pre-existing debug value uses that reside outside the loop.
- auto &Ctx = I->getContext();
- for (auto DVI : DbgValues) {
- BasicBlock *UserBB = DVI->getParent();
- if (InstBB == UserBB || L->contains(UserBB))
- continue;
- // We currently only handle debug values residing in blocks that were
- // traversed while rewriting the uses. If we inserted just a single PHI,
- // we will handle all relevant debug values.
- Value *V = AddedPHIs.size() == 1 ? AddedPHIs[0]
- : SSAUpdate.FindValueForBlock(UserBB);
- if (V)
- DVI->setOperand(0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(V)));
- }
-
- // SSAUpdater might have inserted phi-nodes inside other loops. We'll need
- // to post-process them to keep LCSSA form.
- for (PHINode *InsertedPN : InsertedPHIs) {
- if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent()))
- if (!L->contains(OtherLoop))
- PostProcessPHIs.push_back(InsertedPN);
- }
-
- // Post process PHI instructions that were inserted into another disjoint
- // loop and update their exits properly.
- for (auto *PostProcessPN : PostProcessPHIs)
- if (!PostProcessPN->use_empty())
- Worklist.push_back(PostProcessPN);
-
- // Keep track of PHI nodes that we want to remove because they did not have
- // any uses rewritten. If the new PHI is used, store it so that we can
- // try to propagate dbg.value intrinsics to it.
- SmallVector<PHINode *, 2> NeedDbgValues;
- for (PHINode *PN : AddedPHIs)
- if (PN->use_empty())
- PHIsToRemove.insert(PN);
- else
- NeedDbgValues.push_back(PN);
- insertDebugValuesForPHIs(InstBB, NeedDbgValues);
- Changed = true;
- }
- // Remove PHI nodes that did not have any uses rewritten. We need to redo the
- // use_empty() check here, because even if the PHI node wasn't used when added
- // to PHIsToRemove, later added PHI nodes can be using it. This cleanup is
- // not guaranteed to handle trees/cycles of PHI nodes that only are used by
- // each other. Such situations has only been noticed when the input IR
- // contains unreachable code, and leaving some extra redundant PHI nodes in
- // such situations is considered a minor problem.
- for (PHINode *PN : PHIsToRemove)
- if (PN->use_empty())
- PN->eraseFromParent();
- return Changed;
-}
-
-// Compute the set of BasicBlocks in the loop `L` dominating at least one exit.
-static void computeBlocksDominatingExits(
- Loop &L, DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks,
- SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) {
- SmallVector<BasicBlock *, 8> BBWorklist;
-
- // We start from the exit blocks, as every block trivially dominates itself
- // (not strictly).
- for (BasicBlock *BB : ExitBlocks)
- BBWorklist.push_back(BB);
-
- while (!BBWorklist.empty()) {
- BasicBlock *BB = BBWorklist.pop_back_val();
-
- // Check if this is a loop header. If this is the case, we're done.
- if (L.getHeader() == BB)
- continue;
-
- // Otherwise, add its immediate predecessor in the dominator tree to the
- // worklist, unless we visited it already.
- BasicBlock *IDomBB = DT.getNode(BB)->getIDom()->getBlock();
-
- // Exit blocks can have an immediate dominator not beloinging to the
- // loop. For an exit block to be immediately dominated by another block
- // outside the loop, it implies not all paths from that dominator, to the
- // exit block, go through the loop.
- // Example:
- //
- // |---- A
- // | |
- // | B<--
- // | | |
- // |---> C --
- // |
- // D
- //
- // C is the exit block of the loop and it's immediately dominated by A,
- // which doesn't belong to the loop.
- if (!L.contains(IDomBB))
- continue;
-
- if (BlocksDominatingExits.insert(IDomBB))
- BBWorklist.push_back(IDomBB);
- }
-}
-
-bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
- ScalarEvolution *SE) {
- bool Changed = false;
-
-#ifdef EXPENSIVE_CHECKS
- // Verify all sub-loops are in LCSSA form already.
- for (Loop *SubLoop: L)
- assert(SubLoop->isRecursivelyLCSSAForm(DT, *LI) && "Subloop not in LCSSA!");
-#endif
-
- SmallVector<BasicBlock *, 8> ExitBlocks;
- L.getExitBlocks(ExitBlocks);
- if (ExitBlocks.empty())
- return false;
-
- SmallSetVector<BasicBlock *, 8> BlocksDominatingExits;
-
- // We want to avoid use-scanning leveraging dominance informations.
- // If a block doesn't dominate any of the loop exits, the none of the values
- // defined in the loop can be used outside.
- // We compute the set of blocks fullfilling the conditions in advance
- // walking the dominator tree upwards until we hit a loop header.
- computeBlocksDominatingExits(L, DT, ExitBlocks, BlocksDominatingExits);
-
- SmallVector<Instruction *, 8> Worklist;
-
- // Look at all the instructions in the loop, checking to see if they have uses
- // outside the loop. If so, put them into the worklist to rewrite those uses.
- for (BasicBlock *BB : BlocksDominatingExits) {
- // Skip blocks that are part of any sub-loops, they must be in LCSSA
- // already.
- if (LI->getLoopFor(BB) != &L)
- continue;
- for (Instruction &I : *BB) {
- // Reject two common cases fast: instructions with no uses (like stores)
- // and instructions with one use that is in the same block as this.
- if (I.use_empty() ||
- (I.hasOneUse() && I.user_back()->getParent() == BB &&
- !isa<PHINode>(I.user_back())))
- continue;
-
- // Tokens cannot be used in PHI nodes, so we skip over them.
- // We can run into tokens which are live out of a loop with catchswitch
- // instructions in Windows EH if the catchswitch has one catchpad which
- // is inside the loop and another which is not.
- if (I.getType()->isTokenTy())
- continue;
-
- Worklist.push_back(&I);
- }
- }
- Changed = formLCSSAForInstructions(Worklist, DT, *LI);
-
- // If we modified the code, remove any caches about the loop from SCEV to
- // avoid dangling entries.
- // FIXME: This is a big hammer, can we clear the cache more selectively?
- if (SE && Changed)
- SE->forgetLoop(&L);
-
- assert(L.isLCSSAForm(DT));
-
- return Changed;
-}
-
-/// Process a loop nest depth first.
-bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
- ScalarEvolution *SE) {
- bool Changed = false;
-
- // Recurse depth-first through inner loops.
- for (Loop *SubLoop : L.getSubLoops())
- Changed |= formLCSSARecursively(*SubLoop, DT, LI, SE);
-
- Changed |= formLCSSA(L, DT, LI, SE);
- return Changed;
-}
-
-/// Process all loops in the function, inner-most out.
-static bool formLCSSAOnAllLoops(LoopInfo *LI, DominatorTree &DT,
- ScalarEvolution *SE) {
- bool Changed = false;
- for (auto &L : *LI)
- Changed |= formLCSSARecursively(*L, DT, LI, SE);
- return Changed;
-}
-
-namespace {
-struct LCSSAWrapperPass : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- LCSSAWrapperPass() : FunctionPass(ID) {
- initializeLCSSAWrapperPassPass(*PassRegistry::getPassRegistry());
- }
-
- // Cached analysis information for the current function.
- DominatorTree *DT;
- LoopInfo *LI;
- ScalarEvolution *SE;
-
- bool runOnFunction(Function &F) override;
- void verifyAnalysis() const override {
- // This check is very expensive. On the loop intensive compiles it may cause
- // up to 10x slowdown. Currently it's disabled by default. LPPassManager
- // always does limited form of the LCSSA verification. Similar reasoning
- // was used for the LoopInfo verifier.
- if (VerifyLoopLCSSA) {
- assert(all_of(*LI,
- [&](Loop *L) {
- return L->isRecursivelyLCSSAForm(*DT, *LI);
- }) &&
- "LCSSA form is broken!");
- }
- };
-
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG. It maintains both of these,
- /// as well as the CFG. It also requires dominator information.
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
-
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreservedID(LoopSimplifyID);
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addPreserved<BasicAAWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<ScalarEvolutionWrapperPass>();
- AU.addPreserved<SCEVAAWrapperPass>();
- AU.addPreserved<BranchProbabilityInfoWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
-
- // This is needed to perform LCSSA verification inside LPPassManager
- AU.addRequired<LCSSAVerificationPass>();
- AU.addPreserved<LCSSAVerificationPass>();
- }
-};
-}
-
-char LCSSAWrapperPass::ID = 0;
-INITIALIZE_PASS_BEGIN(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LCSSAVerificationPass)
-INITIALIZE_PASS_END(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass",
- false, false)
-
-Pass *llvm::createLCSSAPass() { return new LCSSAWrapperPass(); }
-char &llvm::LCSSAID = LCSSAWrapperPass::ID;
-
-/// Transform \p F into loop-closed SSA form.
-bool LCSSAWrapperPass::runOnFunction(Function &F) {
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
- SE = SEWP ? &SEWP->getSE() : nullptr;
-
- return formLCSSAOnAllLoops(LI, *DT, SE);
-}
-
-PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) {
- auto &LI = AM.getResult<LoopAnalysis>(F);
- auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- auto *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F);
- if (!formLCSSAOnAllLoops(&LI, DT, SE))
- return PreservedAnalyses::all();
-
- PreservedAnalyses PA;
- PA.preserveSet<CFGAnalyses>();
- PA.preserve<BasicAA>();
- PA.preserve<GlobalsAA>();
- PA.preserve<SCEVAA>();
- PA.preserve<ScalarEvolutionAnalysis>();
- // BPI maps terminators to probabilities, since we don't modify the CFG, no
- // updates are needed to preserve it.
- PA.preserve<BranchProbabilityAnalysis>();
- PA.preserve<MemorySSAAnalysis>();
- return PA;
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
deleted file mode 100644
index 8c67d1dc6eb3..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ /dev/null
@@ -1,561 +0,0 @@
-//===-- LibCallsShrinkWrap.cpp ----------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass shrink-wraps a call to function if the result is not used.
-// The call can set errno but is otherwise side effect free. For example:
-// sqrt(val);
-// is transformed to
-// if (val < 0)
-// sqrt(val);
-// Even if the result of library call is not being used, the compiler cannot
-// safely delete the call because the function can set errno on error
-// conditions.
-// Note in many functions, the error condition solely depends on the incoming
-// parameter. In this optimization, we can generate the condition can lead to
-// the errno to shrink-wrap the call. Since the chances of hitting the error
-// condition is low, the runtime call is effectively eliminated.
-//
-// These partially dead calls are usually results of C++ abstraction penalty
-// exposed by inlining.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstVisitor.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/MDBuilder.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "libcalls-shrinkwrap"
-
-STATISTIC(NumWrappedOneCond, "Number of One-Condition Wrappers Inserted");
-STATISTIC(NumWrappedTwoCond, "Number of Two-Condition Wrappers Inserted");
-
-namespace {
-class LibCallsShrinkWrapLegacyPass : public FunctionPass {
-public:
- static char ID; // Pass identification, replacement for typeid
- explicit LibCallsShrinkWrapLegacyPass() : FunctionPass(ID) {
- initializeLibCallsShrinkWrapLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnFunction(Function &F) override;
-};
-}
-
-char LibCallsShrinkWrapLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap",
- "Conditionally eliminate dead library calls", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap",
- "Conditionally eliminate dead library calls", false, false)
-
-namespace {
-class LibCallsShrinkWrap : public InstVisitor<LibCallsShrinkWrap> {
-public:
- LibCallsShrinkWrap(const TargetLibraryInfo &TLI, DominatorTree *DT)
- : TLI(TLI), DT(DT){};
- void visitCallInst(CallInst &CI) { checkCandidate(CI); }
- bool perform() {
- bool Changed = false;
- for (auto &CI : WorkList) {
- LLVM_DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName()
- << "\n");
- if (perform(CI)) {
- Changed = true;
- LLVM_DEBUG(dbgs() << "Transformed\n");
- }
- }
- return Changed;
- }
-
-private:
- bool perform(CallInst *CI);
- void checkCandidate(CallInst &CI);
- void shrinkWrapCI(CallInst *CI, Value *Cond);
- bool performCallDomainErrorOnly(CallInst *CI, const LibFunc &Func);
- bool performCallErrors(CallInst *CI, const LibFunc &Func);
- bool performCallRangeErrorOnly(CallInst *CI, const LibFunc &Func);
- Value *generateOneRangeCond(CallInst *CI, const LibFunc &Func);
- Value *generateTwoRangeCond(CallInst *CI, const LibFunc &Func);
- Value *generateCondForPow(CallInst *CI, const LibFunc &Func);
-
- // Create an OR of two conditions.
- Value *createOrCond(CallInst *CI, CmpInst::Predicate Cmp, float Val,
- CmpInst::Predicate Cmp2, float Val2) {
- IRBuilder<> BBBuilder(CI);
- Value *Arg = CI->getArgOperand(0);
- auto Cond2 = createCond(BBBuilder, Arg, Cmp2, Val2);
- auto Cond1 = createCond(BBBuilder, Arg, Cmp, Val);
- return BBBuilder.CreateOr(Cond1, Cond2);
- }
-
- // Create a single condition using IRBuilder.
- Value *createCond(IRBuilder<> &BBBuilder, Value *Arg, CmpInst::Predicate Cmp,
- float Val) {
- Constant *V = ConstantFP::get(BBBuilder.getContext(), APFloat(Val));
- if (!Arg->getType()->isFloatTy())
- V = ConstantExpr::getFPExtend(V, Arg->getType());
- return BBBuilder.CreateFCmp(Cmp, Arg, V);
- }
-
- // Create a single condition.
- Value *createCond(CallInst *CI, CmpInst::Predicate Cmp, float Val) {
- IRBuilder<> BBBuilder(CI);
- Value *Arg = CI->getArgOperand(0);
- return createCond(BBBuilder, Arg, Cmp, Val);
- }
-
- const TargetLibraryInfo &TLI;
- DominatorTree *DT;
- SmallVector<CallInst *, 16> WorkList;
-};
-} // end anonymous namespace
-
-// Perform the transformation to calls with errno set by domain error.
-bool LibCallsShrinkWrap::performCallDomainErrorOnly(CallInst *CI,
- const LibFunc &Func) {
- Value *Cond = nullptr;
-
- switch (Func) {
- case LibFunc_acos: // DomainError: (x < -1 || x > 1)
- case LibFunc_acosf: // Same as acos
- case LibFunc_acosl: // Same as acos
- case LibFunc_asin: // DomainError: (x < -1 || x > 1)
- case LibFunc_asinf: // Same as asin
- case LibFunc_asinl: // Same as asin
- {
- ++NumWrappedTwoCond;
- Cond = createOrCond(CI, CmpInst::FCMP_OLT, -1.0f, CmpInst::FCMP_OGT, 1.0f);
- break;
- }
- case LibFunc_cos: // DomainError: (x == +inf || x == -inf)
- case LibFunc_cosf: // Same as cos
- case LibFunc_cosl: // Same as cos
- case LibFunc_sin: // DomainError: (x == +inf || x == -inf)
- case LibFunc_sinf: // Same as sin
- case LibFunc_sinl: // Same as sin
- {
- ++NumWrappedTwoCond;
- Cond = createOrCond(CI, CmpInst::FCMP_OEQ, INFINITY, CmpInst::FCMP_OEQ,
- -INFINITY);
- break;
- }
- case LibFunc_acosh: // DomainError: (x < 1)
- case LibFunc_acoshf: // Same as acosh
- case LibFunc_acoshl: // Same as acosh
- {
- ++NumWrappedOneCond;
- Cond = createCond(CI, CmpInst::FCMP_OLT, 1.0f);
- break;
- }
- case LibFunc_sqrt: // DomainError: (x < 0)
- case LibFunc_sqrtf: // Same as sqrt
- case LibFunc_sqrtl: // Same as sqrt
- {
- ++NumWrappedOneCond;
- Cond = createCond(CI, CmpInst::FCMP_OLT, 0.0f);
- break;
- }
- default:
- return false;
- }
- shrinkWrapCI(CI, Cond);
- return true;
-}
-
-// Perform the transformation to calls with errno set by range error.
-bool LibCallsShrinkWrap::performCallRangeErrorOnly(CallInst *CI,
- const LibFunc &Func) {
- Value *Cond = nullptr;
-
- switch (Func) {
- case LibFunc_cosh:
- case LibFunc_coshf:
- case LibFunc_coshl:
- case LibFunc_exp:
- case LibFunc_expf:
- case LibFunc_expl:
- case LibFunc_exp10:
- case LibFunc_exp10f:
- case LibFunc_exp10l:
- case LibFunc_exp2:
- case LibFunc_exp2f:
- case LibFunc_exp2l:
- case LibFunc_sinh:
- case LibFunc_sinhf:
- case LibFunc_sinhl: {
- Cond = generateTwoRangeCond(CI, Func);
- break;
- }
- case LibFunc_expm1: // RangeError: (709, inf)
- case LibFunc_expm1f: // RangeError: (88, inf)
- case LibFunc_expm1l: // RangeError: (11356, inf)
- {
- Cond = generateOneRangeCond(CI, Func);
- break;
- }
- default:
- return false;
- }
- shrinkWrapCI(CI, Cond);
- return true;
-}
-
-// Perform the transformation to calls with errno set by combination of errors.
-bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
- const LibFunc &Func) {
- Value *Cond = nullptr;
-
- switch (Func) {
- case LibFunc_atanh: // DomainError: (x < -1 || x > 1)
- // PoleError: (x == -1 || x == 1)
- // Overall Cond: (x <= -1 || x >= 1)
- case LibFunc_atanhf: // Same as atanh
- case LibFunc_atanhl: // Same as atanh
- {
- ++NumWrappedTwoCond;
- Cond = createOrCond(CI, CmpInst::FCMP_OLE, -1.0f, CmpInst::FCMP_OGE, 1.0f);
- break;
- }
- case LibFunc_log: // DomainError: (x < 0)
- // PoleError: (x == 0)
- // Overall Cond: (x <= 0)
- case LibFunc_logf: // Same as log
- case LibFunc_logl: // Same as log
- case LibFunc_log10: // Same as log
- case LibFunc_log10f: // Same as log
- case LibFunc_log10l: // Same as log
- case LibFunc_log2: // Same as log
- case LibFunc_log2f: // Same as log
- case LibFunc_log2l: // Same as log
- case LibFunc_logb: // Same as log
- case LibFunc_logbf: // Same as log
- case LibFunc_logbl: // Same as log
- {
- ++NumWrappedOneCond;
- Cond = createCond(CI, CmpInst::FCMP_OLE, 0.0f);
- break;
- }
- case LibFunc_log1p: // DomainError: (x < -1)
- // PoleError: (x == -1)
- // Overall Cond: (x <= -1)
- case LibFunc_log1pf: // Same as log1p
- case LibFunc_log1pl: // Same as log1p
- {
- ++NumWrappedOneCond;
- Cond = createCond(CI, CmpInst::FCMP_OLE, -1.0f);
- break;
- }
- case LibFunc_pow: // DomainError: x < 0 and y is noninteger
- // PoleError: x == 0 and y < 0
- // RangeError: overflow or underflow
- case LibFunc_powf:
- case LibFunc_powl: {
- Cond = generateCondForPow(CI, Func);
- if (Cond == nullptr)
- return false;
- break;
- }
- default:
- return false;
- }
- assert(Cond && "performCallErrors should not see an empty condition");
- shrinkWrapCI(CI, Cond);
- return true;
-}
-
-// Checks if CI is a candidate for shrinkwrapping and put it into work list if
-// true.
-void LibCallsShrinkWrap::checkCandidate(CallInst &CI) {
- if (CI.isNoBuiltin())
- return;
- // A possible improvement is to handle the calls with the return value being
- // used. If there is API for fast libcall implementation without setting
- // errno, we can use the same framework to direct/wrap the call to the fast
- // API in the error free path, and leave the original call in the slow path.
- if (!CI.use_empty())
- return;
-
- LibFunc Func;
- Function *Callee = CI.getCalledFunction();
- if (!Callee)
- return;
- if (!TLI.getLibFunc(*Callee, Func) || !TLI.has(Func))
- return;
-
- if (CI.getNumArgOperands() == 0)
- return;
- // TODO: Handle long double in other formats.
- Type *ArgType = CI.getArgOperand(0)->getType();
- if (!(ArgType->isFloatTy() || ArgType->isDoubleTy() ||
- ArgType->isX86_FP80Ty()))
- return;
-
- WorkList.push_back(&CI);
-}
-
-// Generate the upper bound condition for RangeError.
-Value *LibCallsShrinkWrap::generateOneRangeCond(CallInst *CI,
- const LibFunc &Func) {
- float UpperBound;
- switch (Func) {
- case LibFunc_expm1: // RangeError: (709, inf)
- UpperBound = 709.0f;
- break;
- case LibFunc_expm1f: // RangeError: (88, inf)
- UpperBound = 88.0f;
- break;
- case LibFunc_expm1l: // RangeError: (11356, inf)
- UpperBound = 11356.0f;
- break;
- default:
- llvm_unreachable("Unhandled library call!");
- }
-
- ++NumWrappedOneCond;
- return createCond(CI, CmpInst::FCMP_OGT, UpperBound);
-}
-
-// Generate the lower and upper bound condition for RangeError.
-Value *LibCallsShrinkWrap::generateTwoRangeCond(CallInst *CI,
- const LibFunc &Func) {
- float UpperBound, LowerBound;
- switch (Func) {
- case LibFunc_cosh: // RangeError: (x < -710 || x > 710)
- case LibFunc_sinh: // Same as cosh
- LowerBound = -710.0f;
- UpperBound = 710.0f;
- break;
- case LibFunc_coshf: // RangeError: (x < -89 || x > 89)
- case LibFunc_sinhf: // Same as coshf
- LowerBound = -89.0f;
- UpperBound = 89.0f;
- break;
- case LibFunc_coshl: // RangeError: (x < -11357 || x > 11357)
- case LibFunc_sinhl: // Same as coshl
- LowerBound = -11357.0f;
- UpperBound = 11357.0f;
- break;
- case LibFunc_exp: // RangeError: (x < -745 || x > 709)
- LowerBound = -745.0f;
- UpperBound = 709.0f;
- break;
- case LibFunc_expf: // RangeError: (x < -103 || x > 88)
- LowerBound = -103.0f;
- UpperBound = 88.0f;
- break;
- case LibFunc_expl: // RangeError: (x < -11399 || x > 11356)
- LowerBound = -11399.0f;
- UpperBound = 11356.0f;
- break;
- case LibFunc_exp10: // RangeError: (x < -323 || x > 308)
- LowerBound = -323.0f;
- UpperBound = 308.0f;
- break;
- case LibFunc_exp10f: // RangeError: (x < -45 || x > 38)
- LowerBound = -45.0f;
- UpperBound = 38.0f;
- break;
- case LibFunc_exp10l: // RangeError: (x < -4950 || x > 4932)
- LowerBound = -4950.0f;
- UpperBound = 4932.0f;
- break;
- case LibFunc_exp2: // RangeError: (x < -1074 || x > 1023)
- LowerBound = -1074.0f;
- UpperBound = 1023.0f;
- break;
- case LibFunc_exp2f: // RangeError: (x < -149 || x > 127)
- LowerBound = -149.0f;
- UpperBound = 127.0f;
- break;
- case LibFunc_exp2l: // RangeError: (x < -16445 || x > 11383)
- LowerBound = -16445.0f;
- UpperBound = 11383.0f;
- break;
- default:
- llvm_unreachable("Unhandled library call!");
- }
-
- ++NumWrappedTwoCond;
- return createOrCond(CI, CmpInst::FCMP_OGT, UpperBound, CmpInst::FCMP_OLT,
- LowerBound);
-}
-
-// For pow(x,y), We only handle the following cases:
-// (1) x is a constant && (x >= 1) && (x < MaxUInt8)
-// Cond is: (y > 127)
-// (2) x is a value coming from an integer type.
-// (2.1) if x's bit_size == 8
-// Cond: (x <= 0 || y > 128)
-// (2.2) if x's bit_size is 16
-// Cond: (x <= 0 || y > 64)
-// (2.3) if x's bit_size is 32
-// Cond: (x <= 0 || y > 32)
-// Support for powl(x,y) and powf(x,y) are TBD.
-//
-// Note that condition can be more conservative than the actual condition
-// (i.e. we might invoke the calls that will not set the errno.).
-//
-Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,
- const LibFunc &Func) {
- // FIXME: LibFunc_powf and powl TBD.
- if (Func != LibFunc_pow) {
- LLVM_DEBUG(dbgs() << "Not handled powf() and powl()\n");
- return nullptr;
- }
-
- Value *Base = CI->getArgOperand(0);
- Value *Exp = CI->getArgOperand(1);
- IRBuilder<> BBBuilder(CI);
-
- // Constant Base case.
- if (ConstantFP *CF = dyn_cast<ConstantFP>(Base)) {
- double D = CF->getValueAPF().convertToDouble();
- if (D < 1.0f || D > APInt::getMaxValue(8).getZExtValue()) {
- LLVM_DEBUG(dbgs() << "Not handled pow(): constant base out of range\n");
- return nullptr;
- }
-
- ++NumWrappedOneCond;
- Constant *V = ConstantFP::get(CI->getContext(), APFloat(127.0f));
- if (!Exp->getType()->isFloatTy())
- V = ConstantExpr::getFPExtend(V, Exp->getType());
- return BBBuilder.CreateFCmp(CmpInst::FCMP_OGT, Exp, V);
- }
-
- // If the Base value coming from an integer type.
- Instruction *I = dyn_cast<Instruction>(Base);
- if (!I) {
- LLVM_DEBUG(dbgs() << "Not handled pow(): FP type base\n");
- return nullptr;
- }
- unsigned Opcode = I->getOpcode();
- if (Opcode == Instruction::UIToFP || Opcode == Instruction::SIToFP) {
- unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
- float UpperV = 0.0f;
- if (BW == 8)
- UpperV = 128.0f;
- else if (BW == 16)
- UpperV = 64.0f;
- else if (BW == 32)
- UpperV = 32.0f;
- else {
- LLVM_DEBUG(dbgs() << "Not handled pow(): type too wide\n");
- return nullptr;
- }
-
- ++NumWrappedTwoCond;
- Constant *V = ConstantFP::get(CI->getContext(), APFloat(UpperV));
- Constant *V0 = ConstantFP::get(CI->getContext(), APFloat(0.0f));
- if (!Exp->getType()->isFloatTy())
- V = ConstantExpr::getFPExtend(V, Exp->getType());
- if (!Base->getType()->isFloatTy())
- V0 = ConstantExpr::getFPExtend(V0, Exp->getType());
-
- Value *Cond = BBBuilder.CreateFCmp(CmpInst::FCMP_OGT, Exp, V);
- Value *Cond0 = BBBuilder.CreateFCmp(CmpInst::FCMP_OLE, Base, V0);
- return BBBuilder.CreateOr(Cond0, Cond);
- }
- LLVM_DEBUG(dbgs() << "Not handled pow(): base not from integer convert\n");
- return nullptr;
-}
-
-// Wrap conditions that can potentially generate errno to the library call.
-void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) {
- assert(Cond != nullptr && "ShrinkWrapCI is not expecting an empty call inst");
- MDNode *BranchWeights =
- MDBuilder(CI->getContext()).createBranchWeights(1, 2000);
-
- Instruction *NewInst =
- SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights, DT);
- BasicBlock *CallBB = NewInst->getParent();
- CallBB->setName("cdce.call");
- BasicBlock *SuccBB = CallBB->getSingleSuccessor();
- assert(SuccBB && "The split block should have a single successor");
- SuccBB->setName("cdce.end");
- CI->removeFromParent();
- CallBB->getInstList().insert(CallBB->getFirstInsertionPt(), CI);
- LLVM_DEBUG(dbgs() << "== Basic Block After ==");
- LLVM_DEBUG(dbgs() << *CallBB->getSinglePredecessor() << *CallBB
- << *CallBB->getSingleSuccessor() << "\n");
-}
-
-// Perform the transformation to a single candidate.
-bool LibCallsShrinkWrap::perform(CallInst *CI) {
- LibFunc Func;
- Function *Callee = CI->getCalledFunction();
- assert(Callee && "perform() should apply to a non-empty callee");
- TLI.getLibFunc(*Callee, Func);
- assert(Func && "perform() is not expecting an empty function");
-
- if (performCallDomainErrorOnly(CI, Func) || performCallRangeErrorOnly(CI, Func))
- return true;
- return performCallErrors(CI, Func);
-}
-
-void LibCallsShrinkWrapLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
-}
-
-static bool runImpl(Function &F, const TargetLibraryInfo &TLI,
- DominatorTree *DT) {
- if (F.hasFnAttribute(Attribute::OptimizeForSize))
- return false;
- LibCallsShrinkWrap CCDCE(TLI, DT);
- CCDCE.visit(F);
- bool Changed = CCDCE.perform();
-
-// Verify the dominator after we've updated it locally.
- assert(!DT || DT->verify(DominatorTree::VerificationLevel::Fast));
- return Changed;
-}
-
-bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) {
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- return runImpl(F, TLI, DT);
-}
-
-namespace llvm {
-char &LibCallsShrinkWrapPassID = LibCallsShrinkWrapLegacyPass::ID;
-
-// Public interface to LibCallsShrinkWrap pass.
-FunctionPass *createLibCallsShrinkWrapPass() {
- return new LibCallsShrinkWrapLegacyPass();
-}
-
-PreservedAnalyses LibCallsShrinkWrapPass::run(Function &F,
- FunctionAnalysisManager &FAM) {
- auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
- auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
- if (!runImpl(F, TLI, DT))
- return PreservedAnalyses::all();
- auto PA = PreservedAnalyses();
- PA.preserve<GlobalsAA>();
- PA.preserve<DominatorTreeAnalysis>();
- return PA;
-}
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
deleted file mode 100644
index 39b6b889f91c..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ /dev/null
@@ -1,2956 +0,0 @@
-//===- Local.cpp - Functions to perform local transformations -------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This family of functions perform various local transformations to the
-// program.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseMapInfo.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
-#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LazyValueInfo.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/MemorySSAUpdater.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Analysis/VectorUtils.h"
-#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/IR/Argument.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/ConstantRange.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
-#include "llvm/IR/GlobalObject.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Use.h"
-#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
-#include "llvm/IR/ValueHandle.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-#include <algorithm>
-#include <cassert>
-#include <climits>
-#include <cstdint>
-#include <iterator>
-#include <map>
-#include <utility>
-
-using namespace llvm;
-using namespace llvm::PatternMatch;
-
-#define DEBUG_TYPE "local"
-
-STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
-
-// Max recursion depth for collectBitParts used when detecting bswap and
-// bitreverse idioms
-static const unsigned BitPartRecursionMaxDepth = 64;
-
-//===----------------------------------------------------------------------===//
-// Local constant propagation.
-//
-
-/// ConstantFoldTerminator - If a terminator instruction is predicated on a
-/// constant value, convert it into an unconditional branch to the constant
-/// destination. This is a nontrivial operation because the successors of this
-/// basic block must have their PHI nodes updated.
-/// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch
-/// conditions and indirectbr addresses this might make dead if
-/// DeleteDeadConditions is true.
-bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
- const TargetLibraryInfo *TLI,
- DomTreeUpdater *DTU) {
- Instruction *T = BB->getTerminator();
- IRBuilder<> Builder(T);
-
- // Branch - See if we are conditional jumping on constant
- if (auto *BI = dyn_cast<BranchInst>(T)) {
- if (BI->isUnconditional()) return false; // Can't optimize uncond branch
- BasicBlock *Dest1 = BI->getSuccessor(0);
- BasicBlock *Dest2 = BI->getSuccessor(1);
-
- if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
- // Are we branching on constant?
- // YES. Change to unconditional branch...
- BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2;
- BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1;
-
- // Let the basic block know that we are letting go of it. Based on this,
- // it will adjust it's PHI nodes.
- OldDest->removePredecessor(BB);
-
- // Replace the conditional branch with an unconditional one.
- Builder.CreateBr(Destination);
- BI->eraseFromParent();
- if (DTU)
- DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, OldDest}});
- return true;
- }
-
- if (Dest2 == Dest1) { // Conditional branch to same location?
- // This branch matches something like this:
- // br bool %cond, label %Dest, label %Dest
- // and changes it into: br label %Dest
-
- // Let the basic block know that we are letting go of one copy of it.
- assert(BI->getParent() && "Terminator not inserted in block!");
- Dest1->removePredecessor(BI->getParent());
-
- // Replace the conditional branch with an unconditional one.
- Builder.CreateBr(Dest1);
- Value *Cond = BI->getCondition();
- BI->eraseFromParent();
- if (DeleteDeadConditions)
- RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
- return true;
- }
- return false;
- }
-
- if (auto *SI = dyn_cast<SwitchInst>(T)) {
- // If we are switching on a constant, we can convert the switch to an
- // unconditional branch.
- auto *CI = dyn_cast<ConstantInt>(SI->getCondition());
- BasicBlock *DefaultDest = SI->getDefaultDest();
- BasicBlock *TheOnlyDest = DefaultDest;
-
- // If the default is unreachable, ignore it when searching for TheOnlyDest.
- if (isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg()) &&
- SI->getNumCases() > 0) {
- TheOnlyDest = SI->case_begin()->getCaseSuccessor();
- }
-
- // Figure out which case it goes to.
- for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) {
- // Found case matching a constant operand?
- if (i->getCaseValue() == CI) {
- TheOnlyDest = i->getCaseSuccessor();
- break;
- }
-
- // Check to see if this branch is going to the same place as the default
- // dest. If so, eliminate it as an explicit compare.
- if (i->getCaseSuccessor() == DefaultDest) {
- MDNode *MD = SI->getMetadata(LLVMContext::MD_prof);
- unsigned NCases = SI->getNumCases();
- // Fold the case metadata into the default if there will be any branches
- // left, unless the metadata doesn't match the switch.
- if (NCases > 1 && MD && MD->getNumOperands() == 2 + NCases) {
- // Collect branch weights into a vector.
- SmallVector<uint32_t, 8> Weights;
- for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
- ++MD_i) {
- auto *CI = mdconst::extract<ConstantInt>(MD->getOperand(MD_i));
- Weights.push_back(CI->getValue().getZExtValue());
- }
- // Merge weight of this case to the default weight.
- unsigned idx = i->getCaseIndex();
- Weights[0] += Weights[idx+1];
- // Remove weight for this case.
- std::swap(Weights[idx+1], Weights.back());
- Weights.pop_back();
- SI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(BB->getContext()).
- createBranchWeights(Weights));
- }
- // Remove this entry.
- BasicBlock *ParentBB = SI->getParent();
- DefaultDest->removePredecessor(ParentBB);
- i = SI->removeCase(i);
- e = SI->case_end();
- if (DTU)
- DTU->applyUpdatesPermissive(
- {{DominatorTree::Delete, ParentBB, DefaultDest}});
- continue;
- }
-
- // Otherwise, check to see if the switch only branches to one destination.
- // We do this by reseting "TheOnlyDest" to null when we find two non-equal
- // destinations.
- if (i->getCaseSuccessor() != TheOnlyDest)
- TheOnlyDest = nullptr;
-
- // Increment this iterator as we haven't removed the case.
- ++i;
- }
-
- if (CI && !TheOnlyDest) {
- // Branching on a constant, but not any of the cases, go to the default
- // successor.
- TheOnlyDest = SI->getDefaultDest();
- }
-
- // If we found a single destination that we can fold the switch into, do so
- // now.
- if (TheOnlyDest) {
- // Insert the new branch.
- Builder.CreateBr(TheOnlyDest);
- BasicBlock *BB = SI->getParent();
- std::vector <DominatorTree::UpdateType> Updates;
- if (DTU)
- Updates.reserve(SI->getNumSuccessors() - 1);
-
- // Remove entries from PHI nodes which we no longer branch to...
- for (BasicBlock *Succ : successors(SI)) {
- // Found case matching a constant operand?
- if (Succ == TheOnlyDest) {
- TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest
- } else {
- Succ->removePredecessor(BB);
- if (DTU)
- Updates.push_back({DominatorTree::Delete, BB, Succ});
- }
- }
-
- // Delete the old switch.
- Value *Cond = SI->getCondition();
- SI->eraseFromParent();
- if (DeleteDeadConditions)
- RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
- if (DTU)
- DTU->applyUpdatesPermissive(Updates);
- return true;
- }
-
- if (SI->getNumCases() == 1) {
- // Otherwise, we can fold this switch into a conditional branch
- // instruction if it has only one non-default destination.
- auto FirstCase = *SI->case_begin();
- Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
- FirstCase.getCaseValue(), "cond");
-
- // Insert the new branch.
- BranchInst *NewBr = Builder.CreateCondBr(Cond,
- FirstCase.getCaseSuccessor(),
- SI->getDefaultDest());
- MDNode *MD = SI->getMetadata(LLVMContext::MD_prof);
- if (MD && MD->getNumOperands() == 3) {
- ConstantInt *SICase =
- mdconst::dyn_extract<ConstantInt>(MD->getOperand(2));
- ConstantInt *SIDef =
- mdconst::dyn_extract<ConstantInt>(MD->getOperand(1));
- assert(SICase && SIDef);
- // The TrueWeight should be the weight for the single case of SI.
- NewBr->setMetadata(LLVMContext::MD_prof,
- MDBuilder(BB->getContext()).
- createBranchWeights(SICase->getValue().getZExtValue(),
- SIDef->getValue().getZExtValue()));
- }
-
- // Update make.implicit metadata to the newly-created conditional branch.
- MDNode *MakeImplicitMD = SI->getMetadata(LLVMContext::MD_make_implicit);
- if (MakeImplicitMD)
- NewBr->setMetadata(LLVMContext::MD_make_implicit, MakeImplicitMD);
-
- // Delete the old switch.
- SI->eraseFromParent();
- return true;
- }
- return false;
- }
-
- if (auto *IBI = dyn_cast<IndirectBrInst>(T)) {
- // indirectbr blockaddress(@F, @BB) -> br label @BB
- if (auto *BA =
- dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) {
- BasicBlock *TheOnlyDest = BA->getBasicBlock();
- std::vector <DominatorTree::UpdateType> Updates;
- if (DTU)
- Updates.reserve(IBI->getNumDestinations() - 1);
-
- // Insert the new branch.
- Builder.CreateBr(TheOnlyDest);
-
- for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
- if (IBI->getDestination(i) == TheOnlyDest) {
- TheOnlyDest = nullptr;
- } else {
- BasicBlock *ParentBB = IBI->getParent();
- BasicBlock *DestBB = IBI->getDestination(i);
- DestBB->removePredecessor(ParentBB);
- if (DTU)
- Updates.push_back({DominatorTree::Delete, ParentBB, DestBB});
- }
- }
- Value *Address = IBI->getAddress();
- IBI->eraseFromParent();
- if (DeleteDeadConditions)
- RecursivelyDeleteTriviallyDeadInstructions(Address, TLI);
-
- // If we didn't find our destination in the IBI successor list, then we
- // have undefined behavior. Replace the unconditional branch with an
- // 'unreachable' instruction.
- if (TheOnlyDest) {
- BB->getTerminator()->eraseFromParent();
- new UnreachableInst(BB->getContext(), BB);
- }
-
- if (DTU)
- DTU->applyUpdatesPermissive(Updates);
- return true;
- }
- }
-
- return false;
-}
-
-//===----------------------------------------------------------------------===//
-// Local dead code elimination.
-//
-
-/// isInstructionTriviallyDead - Return true if the result produced by the
-/// instruction is not used, and the instruction has no side effects.
-///
-bool llvm::isInstructionTriviallyDead(Instruction *I,
- const TargetLibraryInfo *TLI) {
- if (!I->use_empty())
- return false;
- return wouldInstructionBeTriviallyDead(I, TLI);
-}
-
-bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
- const TargetLibraryInfo *TLI) {
- if (I->isTerminator())
- return false;
-
- // We don't want the landingpad-like instructions removed by anything this
- // general.
- if (I->isEHPad())
- return false;
-
- // We don't want debug info removed by anything this general, unless
- // debug info is empty.
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) {
- if (DDI->getAddress())
- return false;
- return true;
- }
- if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) {
- if (DVI->getValue())
- return false;
- return true;
- }
- if (DbgLabelInst *DLI = dyn_cast<DbgLabelInst>(I)) {
- if (DLI->getLabel())
- return false;
- return true;
- }
-
- if (!I->mayHaveSideEffects())
- return true;
-
- // Special case intrinsics that "may have side effects" but can be deleted
- // when dead.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- // Safe to delete llvm.stacksave and launder.invariant.group if dead.
- if (II->getIntrinsicID() == Intrinsic::stacksave ||
- II->getIntrinsicID() == Intrinsic::launder_invariant_group)
- return true;
-
- // Lifetime intrinsics are dead when their right-hand is undef.
- if (II->isLifetimeStartOrEnd())
- return isa<UndefValue>(II->getArgOperand(1));
-
- // Assumptions are dead if their condition is trivially true. Guards on
- // true are operationally no-ops. In the future we can consider more
- // sophisticated tradeoffs for guards considering potential for check
- // widening, but for now we keep things simple.
- if (II->getIntrinsicID() == Intrinsic::assume ||
- II->getIntrinsicID() == Intrinsic::experimental_guard) {
- if (ConstantInt *Cond = dyn_cast<ConstantInt>(II->getArgOperand(0)))
- return !Cond->isZero();
-
- return false;
- }
- }
-
- if (isAllocLikeFn(I, TLI))
- return true;
-
- if (CallInst *CI = isFreeCall(I, TLI))
- if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
- return C->isNullValue() || isa<UndefValue>(C);
-
- if (auto *Call = dyn_cast<CallBase>(I))
- if (isMathLibCallNoop(Call, TLI))
- return true;
-
- return false;
-}
-
-/// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a
-/// trivially dead instruction, delete it. If that makes any of its operands
-/// trivially dead, delete them too, recursively. Return true if any
-/// instructions were deleted.
-bool llvm::RecursivelyDeleteTriviallyDeadInstructions(
- Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU) {
- Instruction *I = dyn_cast<Instruction>(V);
- if (!I || !isInstructionTriviallyDead(I, TLI))
- return false;
-
- SmallVector<Instruction*, 16> DeadInsts;
- DeadInsts.push_back(I);
- RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU);
-
- return true;
-}
-
-void llvm::RecursivelyDeleteTriviallyDeadInstructions(
- SmallVectorImpl<Instruction *> &DeadInsts, const TargetLibraryInfo *TLI,
- MemorySSAUpdater *MSSAU) {
- // Process the dead instruction list until empty.
- while (!DeadInsts.empty()) {
- Instruction &I = *DeadInsts.pop_back_val();
- assert(I.use_empty() && "Instructions with uses are not dead.");
- assert(isInstructionTriviallyDead(&I, TLI) &&
- "Live instruction found in dead worklist!");
-
- // Don't lose the debug info while deleting the instructions.
- salvageDebugInfo(I);
-
- // Null out all of the instruction's operands to see if any operand becomes
- // dead as we go.
- for (Use &OpU : I.operands()) {
- Value *OpV = OpU.get();
- OpU.set(nullptr);
-
- if (!OpV->use_empty())
- continue;
-
- // If the operand is an instruction that became dead as we nulled out the
- // operand, and if it is 'trivially' dead, delete it in a future loop
- // iteration.
- if (Instruction *OpI = dyn_cast<Instruction>(OpV))
- if (isInstructionTriviallyDead(OpI, TLI))
- DeadInsts.push_back(OpI);
- }
- if (MSSAU)
- MSSAU->removeMemoryAccess(&I);
-
- I.eraseFromParent();
- }
-}
-
-bool llvm::replaceDbgUsesWithUndef(Instruction *I) {
- SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
- findDbgUsers(DbgUsers, I);
- for (auto *DII : DbgUsers) {
- Value *Undef = UndefValue::get(I->getType());
- DII->setOperand(0, MetadataAsValue::get(DII->getContext(),
- ValueAsMetadata::get(Undef)));
- }
- return !DbgUsers.empty();
-}
-
-/// areAllUsesEqual - Check whether the uses of a value are all the same.
-/// This is similar to Instruction::hasOneUse() except this will also return
-/// true when there are no uses or multiple uses that all refer to the same
-/// value.
-static bool areAllUsesEqual(Instruction *I) {
- Value::user_iterator UI = I->user_begin();
- Value::user_iterator UE = I->user_end();
- if (UI == UE)
- return true;
-
- User *TheUse = *UI;
- for (++UI; UI != UE; ++UI) {
- if (*UI != TheUse)
- return false;
- }
- return true;
-}
-
-/// RecursivelyDeleteDeadPHINode - If the specified value is an effectively
-/// dead PHI node, due to being a def-use chain of single-use nodes that
-/// either forms a cycle or is terminated by a trivially dead instruction,
-/// delete it. If that makes any of its operands trivially dead, delete them
-/// too, recursively. Return true if a change was made.
-bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
- const TargetLibraryInfo *TLI) {
- SmallPtrSet<Instruction*, 4> Visited;
- for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects();
- I = cast<Instruction>(*I->user_begin())) {
- if (I->use_empty())
- return RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
-
- // If we find an instruction more than once, we're on a cycle that
- // won't prove fruitful.
- if (!Visited.insert(I).second) {
- // Break the cycle and delete the instruction and its operands.
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
- (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
- return true;
- }
- }
- return false;
-}
-
-static bool
-simplifyAndDCEInstruction(Instruction *I,
- SmallSetVector<Instruction *, 16> &WorkList,
- const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
- if (isInstructionTriviallyDead(I, TLI)) {
- salvageDebugInfo(*I);
-
- // Null out all of the instruction's operands to see if any operand becomes
- // dead as we go.
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- Value *OpV = I->getOperand(i);
- I->setOperand(i, nullptr);
-
- if (!OpV->use_empty() || I == OpV)
- continue;
-
- // If the operand is an instruction that became dead as we nulled out the
- // operand, and if it is 'trivially' dead, delete it in a future loop
- // iteration.
- if (Instruction *OpI = dyn_cast<Instruction>(OpV))
- if (isInstructionTriviallyDead(OpI, TLI))
- WorkList.insert(OpI);
- }
-
- I->eraseFromParent();
-
- return true;
- }
-
- if (Value *SimpleV = SimplifyInstruction(I, DL)) {
- // Add the users to the worklist. CAREFUL: an instruction can use itself,
- // in the case of a phi node.
- for (User *U : I->users()) {
- if (U != I) {
- WorkList.insert(cast<Instruction>(U));
- }
- }
-
- // Replace the instruction with its simplified value.
- bool Changed = false;
- if (!I->use_empty()) {
- I->replaceAllUsesWith(SimpleV);
- Changed = true;
- }
- if (isInstructionTriviallyDead(I, TLI)) {
- I->eraseFromParent();
- Changed = true;
- }
- return Changed;
- }
- return false;
-}
-
-/// SimplifyInstructionsInBlock - Scan the specified basic block and try to
-/// simplify any instructions in it and recursively delete dead instructions.
-///
-/// This returns true if it changed the code, note that it can delete
-/// instructions in other blocks as well in this block.
-bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,
- const TargetLibraryInfo *TLI) {
- bool MadeChange = false;
- const DataLayout &DL = BB->getModule()->getDataLayout();
-
-#ifndef NDEBUG
- // In debug builds, ensure that the terminator of the block is never replaced
- // or deleted by these simplifications. The idea of simplification is that it
- // cannot introduce new instructions, and there is no way to replace the
- // terminator of a block without introducing a new instruction.
- AssertingVH<Instruction> TerminatorVH(&BB->back());
-#endif
-
- SmallSetVector<Instruction *, 16> WorkList;
- // Iterate over the original function, only adding insts to the worklist
- // if they actually need to be revisited. This avoids having to pre-init
- // the worklist with the entire function's worth of instructions.
- for (BasicBlock::iterator BI = BB->begin(), E = std::prev(BB->end());
- BI != E;) {
- assert(!BI->isTerminator());
- Instruction *I = &*BI;
- ++BI;
-
- // We're visiting this instruction now, so make sure it's not in the
- // worklist from an earlier visit.
- if (!WorkList.count(I))
- MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI);
- }
-
- while (!WorkList.empty()) {
- Instruction *I = WorkList.pop_back_val();
- MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI);
- }
- return MadeChange;
-}
-
-//===----------------------------------------------------------------------===//
-// Control Flow Graph Restructuring.
-//
-
-/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this
-/// method is called when we're about to delete Pred as a predecessor of BB. If
-/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred.
-///
-/// Unlike the removePredecessor method, this attempts to simplify uses of PHI
-/// nodes that collapse into identity values. For example, if we have:
-/// x = phi(1, 0, 0, 0)
-/// y = and x, z
-///
-/// .. and delete the predecessor corresponding to the '1', this will attempt to
-/// recursively fold the and to 0.
-void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
- DomTreeUpdater *DTU) {
- // This only adjusts blocks with PHI nodes.
- if (!isa<PHINode>(BB->begin()))
- return;
-
- // Remove the entries for Pred from the PHI nodes in BB, but do not simplify
- // them down. This will leave us with single entry phi nodes and other phis
- // that can be removed.
- BB->removePredecessor(Pred, true);
-
- WeakTrackingVH PhiIt = &BB->front();
- while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
- PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
- Value *OldPhiIt = PhiIt;
-
- if (!recursivelySimplifyInstruction(PN))
- continue;
-
- // If recursive simplification ended up deleting the next PHI node we would
- // iterate to, then our iterator is invalid, restart scanning from the top
- // of the block.
- if (PhiIt != OldPhiIt) PhiIt = &BB->front();
- }
- if (DTU)
- DTU->applyUpdatesPermissive({{DominatorTree::Delete, Pred, BB}});
-}
-
-/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
-/// predecessor is known to have one successor (DestBB!). Eliminate the edge
-/// between them, moving the instructions in the predecessor into DestBB and
-/// deleting the predecessor block.
-void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB,
- DomTreeUpdater *DTU) {
-
- // If BB has single-entry PHI nodes, fold them.
- while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
- Value *NewVal = PN->getIncomingValue(0);
- // Replace self referencing PHI with undef, it must be dead.
- if (NewVal == PN) NewVal = UndefValue::get(PN->getType());
- PN->replaceAllUsesWith(NewVal);
- PN->eraseFromParent();
- }
-
- BasicBlock *PredBB = DestBB->getSinglePredecessor();
- assert(PredBB && "Block doesn't have a single predecessor!");
-
- bool ReplaceEntryBB = false;
- if (PredBB == &DestBB->getParent()->getEntryBlock())
- ReplaceEntryBB = true;
-
- // DTU updates: Collect all the edges that enter
- // PredBB. These dominator edges will be redirected to DestBB.
- SmallVector<DominatorTree::UpdateType, 32> Updates;
-
- if (DTU) {
- Updates.push_back({DominatorTree::Delete, PredBB, DestBB});
- for (auto I = pred_begin(PredBB), E = pred_end(PredBB); I != E; ++I) {
- Updates.push_back({DominatorTree::Delete, *I, PredBB});
- // This predecessor of PredBB may already have DestBB as a successor.
- if (llvm::find(successors(*I), DestBB) == succ_end(*I))
- Updates.push_back({DominatorTree::Insert, *I, DestBB});
- }
- }
-
- // Zap anything that took the address of DestBB. Not doing this will give the
- // address an invalid value.
- if (DestBB->hasAddressTaken()) {
- BlockAddress *BA = BlockAddress::get(DestBB);
- Constant *Replacement =
- ConstantInt::get(Type::getInt32Ty(BA->getContext()), 1);
- BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
- BA->getType()));
- BA->destroyConstant();
- }
-
- // Anything that branched to PredBB now branches to DestBB.
- PredBB->replaceAllUsesWith(DestBB);
-
- // Splice all the instructions from PredBB to DestBB.
- PredBB->getTerminator()->eraseFromParent();
- DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
- new UnreachableInst(PredBB->getContext(), PredBB);
-
- // If the PredBB is the entry block of the function, move DestBB up to
- // become the entry block after we erase PredBB.
- if (ReplaceEntryBB)
- DestBB->moveAfter(PredBB);
-
- if (DTU) {
- assert(PredBB->getInstList().size() == 1 &&
- isa<UnreachableInst>(PredBB->getTerminator()) &&
- "The successor list of PredBB isn't empty before "
- "applying corresponding DTU updates.");
- DTU->applyUpdatesPermissive(Updates);
- DTU->deleteBB(PredBB);
- // Recalculation of DomTree is needed when updating a forward DomTree and
- // the Entry BB is replaced.
- if (ReplaceEntryBB && DTU->hasDomTree()) {
- // The entry block was removed and there is no external interface for
- // the dominator tree to be notified of this change. In this corner-case
- // we recalculate the entire tree.
- DTU->recalculate(*(DestBB->getParent()));
- }
- }
-
- else {
- PredBB->eraseFromParent(); // Nuke BB if DTU is nullptr.
- }
-}
-
-/// CanMergeValues - Return true if we can choose one of these values to use
-/// in place of the other. Note that we will always choose the non-undef
-/// value to keep.
-static bool CanMergeValues(Value *First, Value *Second) {
- return First == Second || isa<UndefValue>(First) || isa<UndefValue>(Second);
-}
-
-/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
-/// almost-empty BB ending in an unconditional branch to Succ, into Succ.
-///
-/// Assumption: Succ is the single successor for BB.
-static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
- assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
-
- LLVM_DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into "
- << Succ->getName() << "\n");
- // Shortcut, if there is only a single predecessor it must be BB and merging
- // is always safe
- if (Succ->getSinglePredecessor()) return true;
-
- // Make a list of the predecessors of BB
- SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
-
- // Look at all the phi nodes in Succ, to see if they present a conflict when
- // merging these blocks
- for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
-
- // If the incoming value from BB is again a PHINode in
- // BB which has the same incoming value for *PI as PN does, we can
- // merge the phi nodes and then the blocks can still be merged
- PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
- if (BBPN && BBPN->getParent() == BB) {
- for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
- BasicBlock *IBB = PN->getIncomingBlock(PI);
- if (BBPreds.count(IBB) &&
- !CanMergeValues(BBPN->getIncomingValueForBlock(IBB),
- PN->getIncomingValue(PI))) {
- LLVM_DEBUG(dbgs()
- << "Can't fold, phi node " << PN->getName() << " in "
- << Succ->getName() << " is conflicting with "
- << BBPN->getName() << " with regard to common predecessor "
- << IBB->getName() << "\n");
- return false;
- }
- }
- } else {
- Value* Val = PN->getIncomingValueForBlock(BB);
- for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
- // See if the incoming value for the common predecessor is equal to the
- // one for BB, in which case this phi node will not prevent the merging
- // of the block.
- BasicBlock *IBB = PN->getIncomingBlock(PI);
- if (BBPreds.count(IBB) &&
- !CanMergeValues(Val, PN->getIncomingValue(PI))) {
- LLVM_DEBUG(dbgs() << "Can't fold, phi node " << PN->getName()
- << " in " << Succ->getName()
- << " is conflicting with regard to common "
- << "predecessor " << IBB->getName() << "\n");
- return false;
- }
- }
- }
- }
-
- return true;
-}
-
-using PredBlockVector = SmallVector<BasicBlock *, 16>;
-using IncomingValueMap = DenseMap<BasicBlock *, Value *>;
-
-/// Determines the value to use as the phi node input for a block.
-///
-/// Select between \p OldVal any value that we know flows from \p BB
-/// to a particular phi on the basis of which one (if either) is not
-/// undef. Update IncomingValues based on the selected value.
-///
-/// \param OldVal The value we are considering selecting.
-/// \param BB The block that the value flows in from.
-/// \param IncomingValues A map from block-to-value for other phi inputs
-/// that we have examined.
-///
-/// \returns the selected value.
-static Value *selectIncomingValueForBlock(Value *OldVal, BasicBlock *BB,
- IncomingValueMap &IncomingValues) {
- if (!isa<UndefValue>(OldVal)) {
- assert((!IncomingValues.count(BB) ||
- IncomingValues.find(BB)->second == OldVal) &&
- "Expected OldVal to match incoming value from BB!");
-
- IncomingValues.insert(std::make_pair(BB, OldVal));
- return OldVal;
- }
-
- IncomingValueMap::const_iterator It = IncomingValues.find(BB);
- if (It != IncomingValues.end()) return It->second;
-
- return OldVal;
-}
-
-/// Create a map from block to value for the operands of a
-/// given phi.
-///
-/// Create a map from block to value for each non-undef value flowing
-/// into \p PN.
-///
-/// \param PN The phi we are collecting the map for.
-/// \param IncomingValues [out] The map from block to value for this phi.
-static void gatherIncomingValuesToPhi(PHINode *PN,
- IncomingValueMap &IncomingValues) {
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- BasicBlock *BB = PN->getIncomingBlock(i);
- Value *V = PN->getIncomingValue(i);
-
- if (!isa<UndefValue>(V))
- IncomingValues.insert(std::make_pair(BB, V));
- }
-}
-
-/// Replace the incoming undef values to a phi with the values
-/// from a block-to-value map.
-///
-/// \param PN The phi we are replacing the undefs in.
-/// \param IncomingValues A map from block to value.
-static void replaceUndefValuesInPhi(PHINode *PN,
- const IncomingValueMap &IncomingValues) {
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *V = PN->getIncomingValue(i);
-
- if (!isa<UndefValue>(V)) continue;
-
- BasicBlock *BB = PN->getIncomingBlock(i);
- IncomingValueMap::const_iterator It = IncomingValues.find(BB);
- if (It == IncomingValues.end()) continue;
-
- PN->setIncomingValue(i, It->second);
- }
-}
-
-/// Replace a value flowing from a block to a phi with
-/// potentially multiple instances of that value flowing from the
-/// block's predecessors to the phi.
-///
-/// \param BB The block with the value flowing into the phi.
-/// \param BBPreds The predecessors of BB.
-/// \param PN The phi that we are updating.
-static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB,
- const PredBlockVector &BBPreds,
- PHINode *PN) {
- Value *OldVal = PN->removeIncomingValue(BB, false);
- assert(OldVal && "No entry in PHI for Pred BB!");
-
- IncomingValueMap IncomingValues;
-
- // We are merging two blocks - BB, and the block containing PN - and
- // as a result we need to redirect edges from the predecessors of BB
- // to go to the block containing PN, and update PN
- // accordingly. Since we allow merging blocks in the case where the
- // predecessor and successor blocks both share some predecessors,
- // and where some of those common predecessors might have undef
- // values flowing into PN, we want to rewrite those values to be
- // consistent with the non-undef values.
-
- gatherIncomingValuesToPhi(PN, IncomingValues);
-
- // If this incoming value is one of the PHI nodes in BB, the new entries
- // in the PHI node are the entries from the old PHI.
- if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
- PHINode *OldValPN = cast<PHINode>(OldVal);
- for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i) {
- // Note that, since we are merging phi nodes and BB and Succ might
- // have common predecessors, we could end up with a phi node with
- // identical incoming branches. This will be cleaned up later (and
- // will trigger asserts if we try to clean it up now, without also
- // simplifying the corresponding conditional branch).
- BasicBlock *PredBB = OldValPN->getIncomingBlock(i);
- Value *PredVal = OldValPN->getIncomingValue(i);
- Value *Selected = selectIncomingValueForBlock(PredVal, PredBB,
- IncomingValues);
-
- // And add a new incoming value for this predecessor for the
- // newly retargeted branch.
- PN->addIncoming(Selected, PredBB);
- }
- } else {
- for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) {
- // Update existing incoming values in PN for this
- // predecessor of BB.
- BasicBlock *PredBB = BBPreds[i];
- Value *Selected = selectIncomingValueForBlock(OldVal, PredBB,
- IncomingValues);
-
- // And add a new incoming value for this predecessor for the
- // newly retargeted branch.
- PN->addIncoming(Selected, PredBB);
- }
- }
-
- replaceUndefValuesInPhi(PN, IncomingValues);
-}
-
-/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
-/// unconditional branch, and contains no instructions other than PHI nodes,
-/// potential side-effect free intrinsics and the branch. If possible,
-/// eliminate BB by rewriting all the predecessors to branch to the successor
-/// block and return true. If we can't transform, return false.
-bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
- DomTreeUpdater *DTU) {
- assert(BB != &BB->getParent()->getEntryBlock() &&
- "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
-
- // We can't eliminate infinite loops.
- BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
- if (BB == Succ) return false;
-
- // Check to see if merging these blocks would cause conflicts for any of the
- // phi nodes in BB or Succ. If not, we can safely merge.
- if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
-
- // Check for cases where Succ has multiple predecessors and a PHI node in BB
- // has uses which will not disappear when the PHI nodes are merged. It is
- // possible to handle such cases, but difficult: it requires checking whether
- // BB dominates Succ, which is non-trivial to calculate in the case where
- // Succ has multiple predecessors. Also, it requires checking whether
- // constructing the necessary self-referential PHI node doesn't introduce any
- // conflicts; this isn't too difficult, but the previous code for doing this
- // was incorrect.
- //
- // Note that if this check finds a live use, BB dominates Succ, so BB is
- // something like a loop pre-header (or rarely, a part of an irreducible CFG);
- // folding the branch isn't profitable in that case anyway.
- if (!Succ->getSinglePredecessor()) {
- BasicBlock::iterator BBI = BB->begin();
- while (isa<PHINode>(*BBI)) {
- for (Use &U : BBI->uses()) {
- if (PHINode* PN = dyn_cast<PHINode>(U.getUser())) {
- if (PN->getIncomingBlock(U) != BB)
- return false;
- } else {
- return false;
- }
- }
- ++BBI;
- }
- }
-
- // We cannot fold the block if it's a branch to an already present callbr
- // successor because that creates duplicate successors.
- for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
- if (auto *CBI = dyn_cast<CallBrInst>((*I)->getTerminator())) {
- if (Succ == CBI->getDefaultDest())
- return false;
- for (unsigned i = 0, e = CBI->getNumIndirectDests(); i != e; ++i)
- if (Succ == CBI->getIndirectDest(i))
- return false;
- }
- }
-
- LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
-
- SmallVector<DominatorTree::UpdateType, 32> Updates;
- if (DTU) {
- Updates.push_back({DominatorTree::Delete, BB, Succ});
- // All predecessors of BB will be moved to Succ.
- for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
- Updates.push_back({DominatorTree::Delete, *I, BB});
- // This predecessor of BB may already have Succ as a successor.
- if (llvm::find(successors(*I), Succ) == succ_end(*I))
- Updates.push_back({DominatorTree::Insert, *I, Succ});
- }
- }
-
- if (isa<PHINode>(Succ->begin())) {
- // If there is more than one pred of succ, and there are PHI nodes in
- // the successor, then we need to add incoming edges for the PHI nodes
- //
- const PredBlockVector BBPreds(pred_begin(BB), pred_end(BB));
-
- // Loop over all of the PHI nodes in the successor of BB.
- for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
-
- redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN);
- }
- }
-
- if (Succ->getSinglePredecessor()) {
- // BB is the only predecessor of Succ, so Succ will end up with exactly
- // the same predecessors BB had.
-
- // Copy over any phi, debug or lifetime instruction.
- BB->getTerminator()->eraseFromParent();
- Succ->getInstList().splice(Succ->getFirstNonPHI()->getIterator(),
- BB->getInstList());
- } else {
- while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
- // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
- assert(PN->use_empty() && "There shouldn't be any uses here!");
- PN->eraseFromParent();
- }
- }
-
- // If the unconditional branch we replaced contains llvm.loop metadata, we
- // add the metadata to the branch instructions in the predecessors.
- unsigned LoopMDKind = BB->getContext().getMDKindID("llvm.loop");
- Instruction *TI = BB->getTerminator();
- if (TI)
- if (MDNode *LoopMD = TI->getMetadata(LoopMDKind))
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *Pred = *PI;
- Pred->getTerminator()->setMetadata(LoopMDKind, LoopMD);
- }
-
- // Everything that jumped to BB now goes to Succ.
- BB->replaceAllUsesWith(Succ);
- if (!Succ->hasName()) Succ->takeName(BB);
-
- // Clear the successor list of BB to match updates applying to DTU later.
- if (BB->getTerminator())
- BB->getInstList().pop_back();
- new UnreachableInst(BB->getContext(), BB);
- assert(succ_empty(BB) && "The successor list of BB isn't empty before "
- "applying corresponding DTU updates.");
-
- if (DTU) {
- DTU->applyUpdatesPermissive(Updates);
- DTU->deleteBB(BB);
- } else {
- BB->eraseFromParent(); // Delete the old basic block.
- }
- return true;
-}
-
-/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI
-/// nodes in this block. This doesn't try to be clever about PHI nodes
-/// which differ only in the order of the incoming values, but instcombine
-/// orders them so it usually won't matter.
-bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
- // This implementation doesn't currently consider undef operands
- // specially. Theoretically, two phis which are identical except for
- // one having an undef where the other doesn't could be collapsed.
-
- struct PHIDenseMapInfo {
- static PHINode *getEmptyKey() {
- return DenseMapInfo<PHINode *>::getEmptyKey();
- }
-
- static PHINode *getTombstoneKey() {
- return DenseMapInfo<PHINode *>::getTombstoneKey();
- }
-
- static unsigned getHashValue(PHINode *PN) {
- // Compute a hash value on the operands. Instcombine will likely have
- // sorted them, which helps expose duplicates, but we have to check all
- // the operands to be safe in case instcombine hasn't run.
- return static_cast<unsigned>(hash_combine(
- hash_combine_range(PN->value_op_begin(), PN->value_op_end()),
- hash_combine_range(PN->block_begin(), PN->block_end())));
- }
-
- static bool isEqual(PHINode *LHS, PHINode *RHS) {
- if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
- RHS == getEmptyKey() || RHS == getTombstoneKey())
- return LHS == RHS;
- return LHS->isIdenticalTo(RHS);
- }
- };
-
- // Set of unique PHINodes.
- DenseSet<PHINode *, PHIDenseMapInfo> PHISet;
-
- // Examine each PHI.
- bool Changed = false;
- for (auto I = BB->begin(); PHINode *PN = dyn_cast<PHINode>(I++);) {
- auto Inserted = PHISet.insert(PN);
- if (!Inserted.second) {
- // A duplicate. Replace this PHI with its duplicate.
- PN->replaceAllUsesWith(*Inserted.first);
- PN->eraseFromParent();
- Changed = true;
-
- // The RAUW can change PHIs that we already visited. Start over from the
- // beginning.
- PHISet.clear();
- I = BB->begin();
- }
- }
-
- return Changed;
-}
-
-/// enforceKnownAlignment - If the specified pointer points to an object that
-/// we control, modify the object's alignment to PrefAlign. This isn't
-/// often possible though. If alignment is important, a more reliable approach
-/// is to simply align all global variables and allocation instructions to
-/// their preferred alignment from the beginning.
-static unsigned enforceKnownAlignment(Value *V, unsigned Align,
- unsigned PrefAlign,
- const DataLayout &DL) {
- assert(PrefAlign > Align);
-
- V = V->stripPointerCasts();
-
- if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
- // TODO: ideally, computeKnownBits ought to have used
- // AllocaInst::getAlignment() in its computation already, making
- // the below max redundant. But, as it turns out,
- // stripPointerCasts recurses through infinite layers of bitcasts,
- // while computeKnownBits is not allowed to traverse more than 6
- // levels.
- Align = std::max(AI->getAlignment(), Align);
- if (PrefAlign <= Align)
- return Align;
-
- // If the preferred alignment is greater than the natural stack alignment
- // then don't round up. This avoids dynamic stack realignment.
- if (DL.exceedsNaturalStackAlignment(PrefAlign))
- return Align;
- AI->setAlignment(PrefAlign);
- return PrefAlign;
- }
-
- if (auto *GO = dyn_cast<GlobalObject>(V)) {
- // TODO: as above, this shouldn't be necessary.
- Align = std::max(GO->getAlignment(), Align);
- if (PrefAlign <= Align)
- return Align;
-
- // If there is a large requested alignment and we can, bump up the alignment
- // of the global. If the memory we set aside for the global may not be the
- // memory used by the final program then it is impossible for us to reliably
- // enforce the preferred alignment.
- if (!GO->canIncreaseAlignment())
- return Align;
-
- GO->setAlignment(PrefAlign);
- return PrefAlign;
- }
-
- return Align;
-}
-
-unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
- const DataLayout &DL,
- const Instruction *CxtI,
- AssumptionCache *AC,
- const DominatorTree *DT) {
- assert(V->getType()->isPointerTy() &&
- "getOrEnforceKnownAlignment expects a pointer!");
-
- KnownBits Known = computeKnownBits(V, DL, 0, AC, CxtI, DT);
- unsigned TrailZ = Known.countMinTrailingZeros();
-
- // Avoid trouble with ridiculously large TrailZ values, such as
- // those computed from a null pointer.
- TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
-
- unsigned Align = 1u << std::min(Known.getBitWidth() - 1, TrailZ);
-
- // LLVM doesn't support alignments larger than this currently.
- Align = std::min(Align, +Value::MaximumAlignment);
-
- if (PrefAlign > Align)
- Align = enforceKnownAlignment(V, Align, PrefAlign, DL);
-
- // We don't need to make any adjustment.
- return Align;
-}
-
-///===---------------------------------------------------------------------===//
-/// Dbg Intrinsic utilities
-///
-
-/// See if there is a dbg.value intrinsic for DIVar before I.
-static bool LdStHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr,
- Instruction *I) {
- // Since we can't guarantee that the original dbg.declare instrinsic
- // is removed by LowerDbgDeclare(), we need to make sure that we are
- // not inserting the same dbg.value intrinsic over and over.
- BasicBlock::InstListType::iterator PrevI(I);
- if (PrevI != I->getParent()->getInstList().begin()) {
- --PrevI;
- if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI))
- if (DVI->getValue() == I->getOperand(0) &&
- DVI->getVariable() == DIVar &&
- DVI->getExpression() == DIExpr)
- return true;
- }
- return false;
-}
-
-/// See if there is a dbg.value intrinsic for DIVar for the PHI node.
-static bool PhiHasDebugValue(DILocalVariable *DIVar,
- DIExpression *DIExpr,
- PHINode *APN) {
- // Since we can't guarantee that the original dbg.declare instrinsic
- // is removed by LowerDbgDeclare(), we need to make sure that we are
- // not inserting the same dbg.value intrinsic over and over.
- SmallVector<DbgValueInst *, 1> DbgValues;
- findDbgValues(DbgValues, APN);
- for (auto *DVI : DbgValues) {
- assert(DVI->getValue() == APN);
- if ((DVI->getVariable() == DIVar) && (DVI->getExpression() == DIExpr))
- return true;
- }
- return false;
-}
-
-/// Check if the alloc size of \p ValTy is large enough to cover the variable
-/// (or fragment of the variable) described by \p DII.
-///
-/// This is primarily intended as a helper for the different
-/// ConvertDebugDeclareToDebugValue functions. The dbg.declare/dbg.addr that is
-/// converted describes an alloca'd variable, so we need to use the
-/// alloc size of the value when doing the comparison. E.g. an i1 value will be
-/// identified as covering an n-bit fragment, if the store size of i1 is at
-/// least n bits.
-static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) {
- const DataLayout &DL = DII->getModule()->getDataLayout();
- uint64_t ValueSize = DL.getTypeAllocSizeInBits(ValTy);
- if (auto FragmentSize = DII->getFragmentSizeInBits())
- return ValueSize >= *FragmentSize;
- // We can't always calculate the size of the DI variable (e.g. if it is a
- // VLA). Try to use the size of the alloca that the dbg intrinsic describes
- // intead.
- if (DII->isAddressOfVariable())
- if (auto *AI = dyn_cast_or_null<AllocaInst>(DII->getVariableLocation()))
- if (auto FragmentSize = AI->getAllocationSizeInBits(DL))
- return ValueSize >= *FragmentSize;
- // Could not determine size of variable. Conservatively return false.
- return false;
-}
-
-/// Produce a DebugLoc to use for each dbg.declare/inst pair that are promoted
-/// to a dbg.value. Because no machine insts can come from debug intrinsics,
-/// only the scope and inlinedAt is significant. Zero line numbers are used in
-/// case this DebugLoc leaks into any adjacent instructions.
-static DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII, Instruction *Src) {
- // Original dbg.declare must have a location.
- DebugLoc DeclareLoc = DII->getDebugLoc();
- MDNode *Scope = DeclareLoc.getScope();
- DILocation *InlinedAt = DeclareLoc.getInlinedAt();
- // Produce an unknown location with the correct scope / inlinedAt fields.
- return DebugLoc::get(0, 0, Scope, InlinedAt);
-}
-
-/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
-/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
-void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
- StoreInst *SI, DIBuilder &Builder) {
- assert(DII->isAddressOfVariable());
- auto *DIVar = DII->getVariable();
- assert(DIVar && "Missing variable");
- auto *DIExpr = DII->getExpression();
- Value *DV = SI->getValueOperand();
-
- DebugLoc NewLoc = getDebugValueLoc(DII, SI);
-
- if (!valueCoversEntireFragment(DV->getType(), DII)) {
- // FIXME: If storing to a part of the variable described by the dbg.declare,
- // then we want to insert a dbg.value for the corresponding fragment.
- LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: "
- << *DII << '\n');
- // For now, when there is a store to parts of the variable (but we do not
- // know which part) we insert an dbg.value instrinsic to indicate that we
- // know nothing about the variable's content.
- DV = UndefValue::get(DV->getType());
- if (!LdStHasDebugValue(DIVar, DIExpr, SI))
- Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
- return;
- }
-
- if (!LdStHasDebugValue(DIVar, DIExpr, SI))
- Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
-}
-
-/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
-/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
-void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
- LoadInst *LI, DIBuilder &Builder) {
- auto *DIVar = DII->getVariable();
- auto *DIExpr = DII->getExpression();
- assert(DIVar && "Missing variable");
-
- if (LdStHasDebugValue(DIVar, DIExpr, LI))
- return;
-
- if (!valueCoversEntireFragment(LI->getType(), DII)) {
- // FIXME: If only referring to a part of the variable described by the
- // dbg.declare, then we want to insert a dbg.value for the corresponding
- // fragment.
- LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: "
- << *DII << '\n');
- return;
- }
-
- DebugLoc NewLoc = getDebugValueLoc(DII, nullptr);
-
- // We are now tracking the loaded value instead of the address. In the
- // future if multi-location support is added to the IR, it might be
- // preferable to keep tracking both the loaded value and the original
- // address in case the alloca can not be elided.
- Instruction *DbgValue = Builder.insertDbgValueIntrinsic(
- LI, DIVar, DIExpr, NewLoc, (Instruction *)nullptr);
- DbgValue->insertAfter(LI);
-}
-
-/// Inserts a llvm.dbg.value intrinsic after a phi that has an associated
-/// llvm.dbg.declare or llvm.dbg.addr intrinsic.
-void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
- PHINode *APN, DIBuilder &Builder) {
- auto *DIVar = DII->getVariable();
- auto *DIExpr = DII->getExpression();
- assert(DIVar && "Missing variable");
-
- if (PhiHasDebugValue(DIVar, DIExpr, APN))
- return;
-
- if (!valueCoversEntireFragment(APN->getType(), DII)) {
- // FIXME: If only referring to a part of the variable described by the
- // dbg.declare, then we want to insert a dbg.value for the corresponding
- // fragment.
- LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: "
- << *DII << '\n');
- return;
- }
-
- BasicBlock *BB = APN->getParent();
- auto InsertionPt = BB->getFirstInsertionPt();
-
- DebugLoc NewLoc = getDebugValueLoc(DII, nullptr);
-
- // The block may be a catchswitch block, which does not have a valid
- // insertion point.
- // FIXME: Insert dbg.value markers in the successors when appropriate.
- if (InsertionPt != BB->end())
- Builder.insertDbgValueIntrinsic(APN, DIVar, DIExpr, NewLoc, &*InsertionPt);
-}
-
-/// Determine whether this alloca is either a VLA or an array.
-static bool isArray(AllocaInst *AI) {
- return AI->isArrayAllocation() ||
- AI->getType()->getElementType()->isArrayTy();
-}
-
-/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set
-/// of llvm.dbg.value intrinsics.
-bool llvm::LowerDbgDeclare(Function &F) {
- DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
- SmallVector<DbgDeclareInst *, 4> Dbgs;
- for (auto &FI : F)
- for (Instruction &BI : FI)
- if (auto DDI = dyn_cast<DbgDeclareInst>(&BI))
- Dbgs.push_back(DDI);
-
- if (Dbgs.empty())
- return false;
-
- for (auto &I : Dbgs) {
- DbgDeclareInst *DDI = I;
- AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress());
- // If this is an alloca for a scalar variable, insert a dbg.value
- // at each load and store to the alloca and erase the dbg.declare.
- // The dbg.values allow tracking a variable even if it is not
- // stored on the stack, while the dbg.declare can only describe
- // the stack slot (and at a lexical-scope granularity). Later
- // passes will attempt to elide the stack slot.
- if (!AI || isArray(AI))
- continue;
-
- // A volatile load/store means that the alloca can't be elided anyway.
- if (llvm::any_of(AI->users(), [](User *U) -> bool {
- if (LoadInst *LI = dyn_cast<LoadInst>(U))
- return LI->isVolatile();
- if (StoreInst *SI = dyn_cast<StoreInst>(U))
- return SI->isVolatile();
- return false;
- }))
- continue;
-
- for (auto &AIUse : AI->uses()) {
- User *U = AIUse.getUser();
- if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
- if (AIUse.getOperandNo() == 1)
- ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
- } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
- } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
- // This is a call by-value or some other instruction that takes a
- // pointer to the variable. Insert a *value* intrinsic that describes
- // the variable by dereferencing the alloca.
- DebugLoc NewLoc = getDebugValueLoc(DDI, nullptr);
- auto *DerefExpr =
- DIExpression::append(DDI->getExpression(), dwarf::DW_OP_deref);
- DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr, NewLoc,
- CI);
- }
- }
- DDI->eraseFromParent();
- }
- return true;
-}
-
-/// Propagate dbg.value intrinsics through the newly inserted PHIs.
-void llvm::insertDebugValuesForPHIs(BasicBlock *BB,
- SmallVectorImpl<PHINode *> &InsertedPHIs) {
- assert(BB && "No BasicBlock to clone dbg.value(s) from.");
- if (InsertedPHIs.size() == 0)
- return;
-
- // Map existing PHI nodes to their dbg.values.
- ValueToValueMapTy DbgValueMap;
- for (auto &I : *BB) {
- if (auto DbgII = dyn_cast<DbgVariableIntrinsic>(&I)) {
- if (auto *Loc = dyn_cast_or_null<PHINode>(DbgII->getVariableLocation()))
- DbgValueMap.insert({Loc, DbgII});
- }
- }
- if (DbgValueMap.size() == 0)
- return;
-
- // Then iterate through the new PHIs and look to see if they use one of the
- // previously mapped PHIs. If so, insert a new dbg.value intrinsic that will
- // propagate the info through the new PHI.
- LLVMContext &C = BB->getContext();
- for (auto PHI : InsertedPHIs) {
- BasicBlock *Parent = PHI->getParent();
- // Avoid inserting an intrinsic into an EH block.
- if (Parent->getFirstNonPHI()->isEHPad())
- continue;
- auto PhiMAV = MetadataAsValue::get(C, ValueAsMetadata::get(PHI));
- for (auto VI : PHI->operand_values()) {
- auto V = DbgValueMap.find(VI);
- if (V != DbgValueMap.end()) {
- auto *DbgII = cast<DbgVariableIntrinsic>(V->second);
- Instruction *NewDbgII = DbgII->clone();
- NewDbgII->setOperand(0, PhiMAV);
- auto InsertionPt = Parent->getFirstInsertionPt();
- assert(InsertionPt != Parent->end() && "Ill-formed basic block");
- NewDbgII->insertBefore(&*InsertionPt);
- }
- }
- }
-}
-
-/// Finds all intrinsics declaring local variables as living in the memory that
-/// 'V' points to. This may include a mix of dbg.declare and
-/// dbg.addr intrinsics.
-TinyPtrVector<DbgVariableIntrinsic *> llvm::FindDbgAddrUses(Value *V) {
- // This function is hot. Check whether the value has any metadata to avoid a
- // DenseMap lookup.
- if (!V->isUsedByMetadata())
- return {};
- auto *L = LocalAsMetadata::getIfExists(V);
- if (!L)
- return {};
- auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L);
- if (!MDV)
- return {};
-
- TinyPtrVector<DbgVariableIntrinsic *> Declares;
- for (User *U : MDV->users()) {
- if (auto *DII = dyn_cast<DbgVariableIntrinsic>(U))
- if (DII->isAddressOfVariable())
- Declares.push_back(DII);
- }
-
- return Declares;
-}
-
-void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
- // This function is hot. Check whether the value has any metadata to avoid a
- // DenseMap lookup.
- if (!V->isUsedByMetadata())
- return;
- if (auto *L = LocalAsMetadata::getIfExists(V))
- if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))
- for (User *U : MDV->users())
- if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
- DbgValues.push_back(DVI);
-}
-
-void llvm::findDbgUsers(SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers,
- Value *V) {
- // This function is hot. Check whether the value has any metadata to avoid a
- // DenseMap lookup.
- if (!V->isUsedByMetadata())
- return;
- if (auto *L = LocalAsMetadata::getIfExists(V))
- if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))
- for (User *U : MDV->users())
- if (DbgVariableIntrinsic *DII = dyn_cast<DbgVariableIntrinsic>(U))
- DbgUsers.push_back(DII);
-}
-
-bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
- Instruction *InsertBefore, DIBuilder &Builder,
- uint8_t DIExprFlags, int Offset) {
- auto DbgAddrs = FindDbgAddrUses(Address);
- for (DbgVariableIntrinsic *DII : DbgAddrs) {
- DebugLoc Loc = DII->getDebugLoc();
- auto *DIVar = DII->getVariable();
- auto *DIExpr = DII->getExpression();
- assert(DIVar && "Missing variable");
- DIExpr = DIExpression::prepend(DIExpr, DIExprFlags, Offset);
- // Insert llvm.dbg.declare immediately before InsertBefore, and remove old
- // llvm.dbg.declare.
- Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore);
- if (DII == InsertBefore)
- InsertBefore = InsertBefore->getNextNode();
- DII->eraseFromParent();
- }
- return !DbgAddrs.empty();
-}
-
-bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
- DIBuilder &Builder, uint8_t DIExprFlags,
- int Offset) {
- return replaceDbgDeclare(AI, NewAllocaAddress, AI->getNextNode(), Builder,
- DIExprFlags, Offset);
-}
-
-static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
- DIBuilder &Builder, int Offset) {
- DebugLoc Loc = DVI->getDebugLoc();
- auto *DIVar = DVI->getVariable();
- auto *DIExpr = DVI->getExpression();
- assert(DIVar && "Missing variable");
-
- // This is an alloca-based llvm.dbg.value. The first thing it should do with
- // the alloca pointer is dereference it. Otherwise we don't know how to handle
- // it and give up.
- if (!DIExpr || DIExpr->getNumElements() < 1 ||
- DIExpr->getElement(0) != dwarf::DW_OP_deref)
- return;
-
- // Insert the offset immediately after the first deref.
- // We could just change the offset argument of dbg.value, but it's unsigned...
- if (Offset) {
- SmallVector<uint64_t, 4> Ops;
- Ops.push_back(dwarf::DW_OP_deref);
- DIExpression::appendOffset(Ops, Offset);
- Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end());
- DIExpr = Builder.createExpression(Ops);
- }
-
- Builder.insertDbgValueIntrinsic(NewAddress, DIVar, DIExpr, Loc, DVI);
- DVI->eraseFromParent();
-}
-
-void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
- DIBuilder &Builder, int Offset) {
- if (auto *L = LocalAsMetadata::getIfExists(AI))
- if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
- for (auto UI = MDV->use_begin(), UE = MDV->use_end(); UI != UE;) {
- Use &U = *UI++;
- if (auto *DVI = dyn_cast<DbgValueInst>(U.getUser()))
- replaceOneDbgValueForAlloca(DVI, NewAllocaAddress, Builder, Offset);
- }
-}
-
-/// Wrap \p V in a ValueAsMetadata instance.
-static MetadataAsValue *wrapValueInMetadata(LLVMContext &C, Value *V) {
- return MetadataAsValue::get(C, ValueAsMetadata::get(V));
-}
-
-bool llvm::salvageDebugInfo(Instruction &I) {
- SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
- findDbgUsers(DbgUsers, &I);
- if (DbgUsers.empty())
- return false;
-
- return salvageDebugInfoForDbgValues(I, DbgUsers);
-}
-
-bool llvm::salvageDebugInfoForDbgValues(
- Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers) {
- auto &Ctx = I.getContext();
- auto wrapMD = [&](Value *V) { return wrapValueInMetadata(Ctx, V); };
-
- for (auto *DII : DbgUsers) {
- // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they
- // are implicitly pointing out the value as a DWARF memory location
- // description.
- bool StackValue = isa<DbgValueInst>(DII);
-
- DIExpression *DIExpr =
- salvageDebugInfoImpl(I, DII->getExpression(), StackValue);
-
- // salvageDebugInfoImpl should fail on examining the first element of
- // DbgUsers, or none of them.
- if (!DIExpr)
- return false;
-
- DII->setOperand(0, wrapMD(I.getOperand(0)));
- DII->setOperand(2, MetadataAsValue::get(Ctx, DIExpr));
- LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
- }
-
- return true;
-}
-
-DIExpression *llvm::salvageDebugInfoImpl(Instruction &I,
- DIExpression *SrcDIExpr,
- bool WithStackValue) {
- auto &M = *I.getModule();
- auto &DL = M.getDataLayout();
-
- // Apply a vector of opcodes to the source DIExpression.
- auto doSalvage = [&](SmallVectorImpl<uint64_t> &Ops) -> DIExpression * {
- DIExpression *DIExpr = SrcDIExpr;
- if (!Ops.empty()) {
- DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue);
- }
- return DIExpr;
- };
-
- // Apply the given offset to the source DIExpression.
- auto applyOffset = [&](uint64_t Offset) -> DIExpression * {
- SmallVector<uint64_t, 8> Ops;
- DIExpression::appendOffset(Ops, Offset);
- return doSalvage(Ops);
- };
-
- // initializer-list helper for applying operators to the source DIExpression.
- auto applyOps =
- [&](std::initializer_list<uint64_t> Opcodes) -> DIExpression * {
- SmallVector<uint64_t, 8> Ops(Opcodes);
- return doSalvage(Ops);
- };
-
- if (auto *CI = dyn_cast<CastInst>(&I)) {
- // No-op casts and zexts are irrelevant for debug info.
- if (CI->isNoopCast(DL) || isa<ZExtInst>(&I))
- return SrcDIExpr;
- return nullptr;
- } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
- unsigned BitWidth =
- M.getDataLayout().getIndexSizeInBits(GEP->getPointerAddressSpace());
- // Rewrite a constant GEP into a DIExpression.
- APInt Offset(BitWidth, 0);
- if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) {
- return applyOffset(Offset.getSExtValue());
- } else {
- return nullptr;
- }
- } else if (auto *BI = dyn_cast<BinaryOperator>(&I)) {
- // Rewrite binary operations with constant integer operands.
- auto *ConstInt = dyn_cast<ConstantInt>(I.getOperand(1));
- if (!ConstInt || ConstInt->getBitWidth() > 64)
- return nullptr;
-
- uint64_t Val = ConstInt->getSExtValue();
- switch (BI->getOpcode()) {
- case Instruction::Add:
- return applyOffset(Val);
- case Instruction::Sub:
- return applyOffset(-int64_t(Val));
- case Instruction::Mul:
- return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_mul});
- case Instruction::SDiv:
- return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_div});
- case Instruction::SRem:
- return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_mod});
- case Instruction::Or:
- return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_or});
- case Instruction::And:
- return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_and});
- case Instruction::Xor:
- return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_xor});
- case Instruction::Shl:
- return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shl});
- case Instruction::LShr:
- return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shr});
- case Instruction::AShr:
- return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shra});
- default:
- // TODO: Salvage constants from each kind of binop we know about.
- return nullptr;
- }
- // *Not* to do: we should not attempt to salvage load instructions,
- // because the validity and lifetime of a dbg.value containing
- // DW_OP_deref becomes difficult to analyze. See PR40628 for examples.
- }
- return nullptr;
-}
-
-/// A replacement for a dbg.value expression.
-using DbgValReplacement = Optional<DIExpression *>;
-
-/// Point debug users of \p From to \p To using exprs given by \p RewriteExpr,
-/// possibly moving/deleting users to prevent use-before-def. Returns true if
-/// changes are made.
-static bool rewriteDebugUsers(
- Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT,
- function_ref<DbgValReplacement(DbgVariableIntrinsic &DII)> RewriteExpr) {
- // Find debug users of From.
- SmallVector<DbgVariableIntrinsic *, 1> Users;
- findDbgUsers(Users, &From);
- if (Users.empty())
- return false;
-
- // Prevent use-before-def of To.
- bool Changed = false;
- SmallPtrSet<DbgVariableIntrinsic *, 1> DeleteOrSalvage;
- if (isa<Instruction>(&To)) {
- bool DomPointAfterFrom = From.getNextNonDebugInstruction() == &DomPoint;
-
- for (auto *DII : Users) {
- // It's common to see a debug user between From and DomPoint. Move it
- // after DomPoint to preserve the variable update without any reordering.
- if (DomPointAfterFrom && DII->getNextNonDebugInstruction() == &DomPoint) {
- LLVM_DEBUG(dbgs() << "MOVE: " << *DII << '\n');
- DII->moveAfter(&DomPoint);
- Changed = true;
-
- // Users which otherwise aren't dominated by the replacement value must
- // be salvaged or deleted.
- } else if (!DT.dominates(&DomPoint, DII)) {
- DeleteOrSalvage.insert(DII);
- }
- }
- }
-
- // Update debug users without use-before-def risk.
- for (auto *DII : Users) {
- if (DeleteOrSalvage.count(DII))
- continue;
-
- LLVMContext &Ctx = DII->getContext();
- DbgValReplacement DVR = RewriteExpr(*DII);
- if (!DVR)
- continue;
-
- DII->setOperand(0, wrapValueInMetadata(Ctx, &To));
- DII->setOperand(2, MetadataAsValue::get(Ctx, *DVR));
- LLVM_DEBUG(dbgs() << "REWRITE: " << *DII << '\n');
- Changed = true;
- }
-
- if (!DeleteOrSalvage.empty()) {
- // Try to salvage the remaining debug users.
- Changed |= salvageDebugInfo(From);
-
- // Delete the debug users which weren't salvaged.
- for (auto *DII : DeleteOrSalvage) {
- if (DII->getVariableLocation() == &From) {
- LLVM_DEBUG(dbgs() << "Erased UseBeforeDef: " << *DII << '\n');
- DII->eraseFromParent();
- Changed = true;
- }
- }
- }
-
- return Changed;
-}
-
-/// Check if a bitcast between a value of type \p FromTy to type \p ToTy would
-/// losslessly preserve the bits and semantics of the value. This predicate is
-/// symmetric, i.e swapping \p FromTy and \p ToTy should give the same result.
-///
-/// Note that Type::canLosslesslyBitCastTo is not suitable here because it
-/// allows semantically unequivalent bitcasts, such as <2 x i64> -> <4 x i32>,
-/// and also does not allow lossless pointer <-> integer conversions.
-static bool isBitCastSemanticsPreserving(const DataLayout &DL, Type *FromTy,
- Type *ToTy) {
- // Trivially compatible types.
- if (FromTy == ToTy)
- return true;
-
- // Handle compatible pointer <-> integer conversions.
- if (FromTy->isIntOrPtrTy() && ToTy->isIntOrPtrTy()) {
- bool SameSize = DL.getTypeSizeInBits(FromTy) == DL.getTypeSizeInBits(ToTy);
- bool LosslessConversion = !DL.isNonIntegralPointerType(FromTy) &&
- !DL.isNonIntegralPointerType(ToTy);
- return SameSize && LosslessConversion;
- }
-
- // TODO: This is not exhaustive.
- return false;
-}
-
-bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
- Instruction &DomPoint, DominatorTree &DT) {
- // Exit early if From has no debug users.
- if (!From.isUsedByMetadata())
- return false;
-
- assert(&From != &To && "Can't replace something with itself");
-
- Type *FromTy = From.getType();
- Type *ToTy = To.getType();
-
- auto Identity = [&](DbgVariableIntrinsic &DII) -> DbgValReplacement {
- return DII.getExpression();
- };
-
- // Handle no-op conversions.
- Module &M = *From.getModule();
- const DataLayout &DL = M.getDataLayout();
- if (isBitCastSemanticsPreserving(DL, FromTy, ToTy))
- return rewriteDebugUsers(From, To, DomPoint, DT, Identity);
-
- // Handle integer-to-integer widening and narrowing.
- // FIXME: Use DW_OP_convert when it's available everywhere.
- if (FromTy->isIntegerTy() && ToTy->isIntegerTy()) {
- uint64_t FromBits = FromTy->getPrimitiveSizeInBits();
- uint64_t ToBits = ToTy->getPrimitiveSizeInBits();
- assert(FromBits != ToBits && "Unexpected no-op conversion");
-
- // When the width of the result grows, assume that a debugger will only
- // access the low `FromBits` bits when inspecting the source variable.
- if (FromBits < ToBits)
- return rewriteDebugUsers(From, To, DomPoint, DT, Identity);
-
- // The width of the result has shrunk. Use sign/zero extension to describe
- // the source variable's high bits.
- auto SignOrZeroExt = [&](DbgVariableIntrinsic &DII) -> DbgValReplacement {
- DILocalVariable *Var = DII.getVariable();
-
- // Without knowing signedness, sign/zero extension isn't possible.
- auto Signedness = Var->getSignedness();
- if (!Signedness)
- return None;
-
- bool Signed = *Signedness == DIBasicType::Signedness::Signed;
- dwarf::TypeKind TK = Signed ? dwarf::DW_ATE_signed : dwarf::DW_ATE_unsigned;
- SmallVector<uint64_t, 8> Ops({dwarf::DW_OP_LLVM_convert, ToBits, TK,
- dwarf::DW_OP_LLVM_convert, FromBits, TK});
- return DIExpression::appendToStack(DII.getExpression(), Ops);
- };
- return rewriteDebugUsers(From, To, DomPoint, DT, SignOrZeroExt);
- }
-
- // TODO: Floating-point conversions, vectors.
- return false;
-}
-
-unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
- unsigned NumDeadInst = 0;
- // Delete the instructions backwards, as it has a reduced likelihood of
- // having to update as many def-use and use-def chains.
- Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
- while (EndInst != &BB->front()) {
- // Delete the next to last instruction.
- Instruction *Inst = &*--EndInst->getIterator();
- if (!Inst->use_empty() && !Inst->getType()->isTokenTy())
- Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
- if (Inst->isEHPad() || Inst->getType()->isTokenTy()) {
- EndInst = Inst;
- continue;
- }
- if (!isa<DbgInfoIntrinsic>(Inst))
- ++NumDeadInst;
- Inst->eraseFromParent();
- }
- return NumDeadInst;
-}
-
-unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
- bool PreserveLCSSA, DomTreeUpdater *DTU,
- MemorySSAUpdater *MSSAU) {
- BasicBlock *BB = I->getParent();
- std::vector <DominatorTree::UpdateType> Updates;
-
- if (MSSAU)
- MSSAU->changeToUnreachable(I);
-
- // Loop over all of the successors, removing BB's entry from any PHI
- // nodes.
- if (DTU)
- Updates.reserve(BB->getTerminator()->getNumSuccessors());
- for (BasicBlock *Successor : successors(BB)) {
- Successor->removePredecessor(BB, PreserveLCSSA);
- if (DTU)
- Updates.push_back({DominatorTree::Delete, BB, Successor});
- }
- // Insert a call to llvm.trap right before this. This turns the undefined
- // behavior into a hard fail instead of falling through into random code.
- if (UseLLVMTrap) {
- Function *TrapFn =
- Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
- CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
- CallTrap->setDebugLoc(I->getDebugLoc());
- }
- auto *UI = new UnreachableInst(I->getContext(), I);
- UI->setDebugLoc(I->getDebugLoc());
-
- // All instructions after this are dead.
- unsigned NumInstrsRemoved = 0;
- BasicBlock::iterator BBI = I->getIterator(), BBE = BB->end();
- while (BBI != BBE) {
- if (!BBI->use_empty())
- BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
- BB->getInstList().erase(BBI++);
- ++NumInstrsRemoved;
- }
- if (DTU)
- DTU->applyUpdatesPermissive(Updates);
- return NumInstrsRemoved;
-}
-
-/// changeToCall - Convert the specified invoke into a normal call.
-static void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr) {
- SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
- SmallVector<OperandBundleDef, 1> OpBundles;
- II->getOperandBundlesAsDefs(OpBundles);
- CallInst *NewCall = CallInst::Create(
- II->getFunctionType(), II->getCalledValue(), Args, OpBundles, "", II);
- NewCall->takeName(II);
- NewCall->setCallingConv(II->getCallingConv());
- NewCall->setAttributes(II->getAttributes());
- NewCall->setDebugLoc(II->getDebugLoc());
- NewCall->copyMetadata(*II);
- II->replaceAllUsesWith(NewCall);
-
- // Follow the call by a branch to the normal destination.
- BasicBlock *NormalDestBB = II->getNormalDest();
- BranchInst::Create(NormalDestBB, II);
-
- // Update PHI nodes in the unwind destination
- BasicBlock *BB = II->getParent();
- BasicBlock *UnwindDestBB = II->getUnwindDest();
- UnwindDestBB->removePredecessor(BB);
- II->eraseFromParent();
- if (DTU)
- DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, UnwindDestBB}});
-}
-
-BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
- BasicBlock *UnwindEdge) {
- BasicBlock *BB = CI->getParent();
-
- // Convert this function call into an invoke instruction. First, split the
- // basic block.
- BasicBlock *Split =
- BB->splitBasicBlock(CI->getIterator(), CI->getName() + ".noexc");
-
- // Delete the unconditional branch inserted by splitBasicBlock
- BB->getInstList().pop_back();
-
- // Create the new invoke instruction.
- SmallVector<Value *, 8> InvokeArgs(CI->arg_begin(), CI->arg_end());
- SmallVector<OperandBundleDef, 1> OpBundles;
-
- CI->getOperandBundlesAsDefs(OpBundles);
-
- // Note: we're round tripping operand bundles through memory here, and that
- // can potentially be avoided with a cleverer API design that we do not have
- // as of this time.
-
- InvokeInst *II =
- InvokeInst::Create(CI->getFunctionType(), CI->getCalledValue(), Split,
- UnwindEdge, InvokeArgs, OpBundles, CI->getName(), BB);
- II->setDebugLoc(CI->getDebugLoc());
- II->setCallingConv(CI->getCallingConv());
- II->setAttributes(CI->getAttributes());
-
- // Make sure that anything using the call now uses the invoke! This also
- // updates the CallGraph if present, because it uses a WeakTrackingVH.
- CI->replaceAllUsesWith(II);
-
- // Delete the original call
- Split->getInstList().pop_front();
- return Split;
-}
-
-static bool markAliveBlocks(Function &F,
- SmallPtrSetImpl<BasicBlock *> &Reachable,
- DomTreeUpdater *DTU = nullptr) {
- SmallVector<BasicBlock*, 128> Worklist;
- BasicBlock *BB = &F.front();
- Worklist.push_back(BB);
- Reachable.insert(BB);
- bool Changed = false;
- do {
- BB = Worklist.pop_back_val();
-
- // Do a quick scan of the basic block, turning any obviously unreachable
- // instructions into LLVM unreachable insts. The instruction combining pass
- // canonicalizes unreachable insts into stores to null or undef.
- for (Instruction &I : *BB) {
- if (auto *CI = dyn_cast<CallInst>(&I)) {
- Value *Callee = CI->getCalledValue();
- // Handle intrinsic calls.
- if (Function *F = dyn_cast<Function>(Callee)) {
- auto IntrinsicID = F->getIntrinsicID();
- // Assumptions that are known to be false are equivalent to
- // unreachable. Also, if the condition is undefined, then we make the
- // choice most beneficial to the optimizer, and choose that to also be
- // unreachable.
- if (IntrinsicID == Intrinsic::assume) {
- if (match(CI->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) {
- // Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(CI, false, false, DTU);
- Changed = true;
- break;
- }
- } else if (IntrinsicID == Intrinsic::experimental_guard) {
- // A call to the guard intrinsic bails out of the current
- // compilation unit if the predicate passed to it is false. If the
- // predicate is a constant false, then we know the guard will bail
- // out of the current compile unconditionally, so all code following
- // it is dead.
- //
- // Note: unlike in llvm.assume, it is not "obviously profitable" for
- // guards to treat `undef` as `false` since a guard on `undef` can
- // still be useful for widening.
- if (match(CI->getArgOperand(0), m_Zero()))
- if (!isa<UnreachableInst>(CI->getNextNode())) {
- changeToUnreachable(CI->getNextNode(), /*UseLLVMTrap=*/false,
- false, DTU);
- Changed = true;
- break;
- }
- }
- } else if ((isa<ConstantPointerNull>(Callee) &&
- !NullPointerIsDefined(CI->getFunction())) ||
- isa<UndefValue>(Callee)) {
- changeToUnreachable(CI, /*UseLLVMTrap=*/false, false, DTU);
- Changed = true;
- break;
- }
- if (CI->doesNotReturn() && !CI->isMustTailCall()) {
- // If we found a call to a no-return function, insert an unreachable
- // instruction after it. Make sure there isn't *already* one there
- // though.
- if (!isa<UnreachableInst>(CI->getNextNode())) {
- // Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(CI->getNextNode(), false, false, DTU);
- Changed = true;
- }
- break;
- }
- } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
- // Store to undef and store to null are undefined and used to signal
- // that they should be changed to unreachable by passes that can't
- // modify the CFG.
-
- // Don't touch volatile stores.
- if (SI->isVolatile()) continue;
-
- Value *Ptr = SI->getOperand(1);
-
- if (isa<UndefValue>(Ptr) ||
- (isa<ConstantPointerNull>(Ptr) &&
- !NullPointerIsDefined(SI->getFunction(),
- SI->getPointerAddressSpace()))) {
- changeToUnreachable(SI, true, false, DTU);
- Changed = true;
- break;
- }
- }
- }
-
- Instruction *Terminator = BB->getTerminator();
- if (auto *II = dyn_cast<InvokeInst>(Terminator)) {
- // Turn invokes that call 'nounwind' functions into ordinary calls.
- Value *Callee = II->getCalledValue();
- if ((isa<ConstantPointerNull>(Callee) &&
- !NullPointerIsDefined(BB->getParent())) ||
- isa<UndefValue>(Callee)) {
- changeToUnreachable(II, true, false, DTU);
- Changed = true;
- } else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) {
- if (II->use_empty() && II->onlyReadsMemory()) {
- // jump to the normal destination branch.
- BasicBlock *NormalDestBB = II->getNormalDest();
- BasicBlock *UnwindDestBB = II->getUnwindDest();
- BranchInst::Create(NormalDestBB, II);
- UnwindDestBB->removePredecessor(II->getParent());
- II->eraseFromParent();
- if (DTU)
- DTU->applyUpdatesPermissive(
- {{DominatorTree::Delete, BB, UnwindDestBB}});
- } else
- changeToCall(II, DTU);
- Changed = true;
- }
- } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Terminator)) {
- // Remove catchpads which cannot be reached.
- struct CatchPadDenseMapInfo {
- static CatchPadInst *getEmptyKey() {
- return DenseMapInfo<CatchPadInst *>::getEmptyKey();
- }
-
- static CatchPadInst *getTombstoneKey() {
- return DenseMapInfo<CatchPadInst *>::getTombstoneKey();
- }
-
- static unsigned getHashValue(CatchPadInst *CatchPad) {
- return static_cast<unsigned>(hash_combine_range(
- CatchPad->value_op_begin(), CatchPad->value_op_end()));
- }
-
- static bool isEqual(CatchPadInst *LHS, CatchPadInst *RHS) {
- if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
- RHS == getEmptyKey() || RHS == getTombstoneKey())
- return LHS == RHS;
- return LHS->isIdenticalTo(RHS);
- }
- };
-
- // Set of unique CatchPads.
- SmallDenseMap<CatchPadInst *, detail::DenseSetEmpty, 4,
- CatchPadDenseMapInfo, detail::DenseSetPair<CatchPadInst *>>
- HandlerSet;
- detail::DenseSetEmpty Empty;
- for (CatchSwitchInst::handler_iterator I = CatchSwitch->handler_begin(),
- E = CatchSwitch->handler_end();
- I != E; ++I) {
- BasicBlock *HandlerBB = *I;
- auto *CatchPad = cast<CatchPadInst>(HandlerBB->getFirstNonPHI());
- if (!HandlerSet.insert({CatchPad, Empty}).second) {
- CatchSwitch->removeHandler(I);
- --I;
- --E;
- Changed = true;
- }
- }
- }
-
- Changed |= ConstantFoldTerminator(BB, true, nullptr, DTU);
- for (BasicBlock *Successor : successors(BB))
- if (Reachable.insert(Successor).second)
- Worklist.push_back(Successor);
- } while (!Worklist.empty());
- return Changed;
-}
-
-void llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) {
- Instruction *TI = BB->getTerminator();
-
- if (auto *II = dyn_cast<InvokeInst>(TI)) {
- changeToCall(II, DTU);
- return;
- }
-
- Instruction *NewTI;
- BasicBlock *UnwindDest;
-
- if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
- NewTI = CleanupReturnInst::Create(CRI->getCleanupPad(), nullptr, CRI);
- UnwindDest = CRI->getUnwindDest();
- } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) {
- auto *NewCatchSwitch = CatchSwitchInst::Create(
- CatchSwitch->getParentPad(), nullptr, CatchSwitch->getNumHandlers(),
- CatchSwitch->getName(), CatchSwitch);
- for (BasicBlock *PadBB : CatchSwitch->handlers())
- NewCatchSwitch->addHandler(PadBB);
-
- NewTI = NewCatchSwitch;
- UnwindDest = CatchSwitch->getUnwindDest();
- } else {
- llvm_unreachable("Could not find unwind successor");
- }
-
- NewTI->takeName(TI);
- NewTI->setDebugLoc(TI->getDebugLoc());
- UnwindDest->removePredecessor(BB);
- TI->replaceAllUsesWith(NewTI);
- TI->eraseFromParent();
- if (DTU)
- DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, UnwindDest}});
-}
-
-/// removeUnreachableBlocks - Remove blocks that are not reachable, even
-/// if they are in a dead cycle. Return true if a change was made, false
-/// otherwise. If `LVI` is passed, this function preserves LazyValueInfo
-/// after modifying the CFG.
-bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
- DomTreeUpdater *DTU,
- MemorySSAUpdater *MSSAU) {
- SmallPtrSet<BasicBlock*, 16> Reachable;
- bool Changed = markAliveBlocks(F, Reachable, DTU);
-
- // If there are unreachable blocks in the CFG...
- if (Reachable.size() == F.size())
- return Changed;
-
- assert(Reachable.size() < F.size());
- NumRemoved += F.size()-Reachable.size();
-
- SmallSetVector<BasicBlock *, 8> DeadBlockSet;
- for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) {
- auto *BB = &*I;
- if (Reachable.count(BB))
- continue;
- DeadBlockSet.insert(BB);
- }
-
- if (MSSAU)
- MSSAU->removeBlocks(DeadBlockSet);
-
- // Loop over all of the basic blocks that are not reachable, dropping all of
- // their internal references. Update DTU and LVI if available.
- std::vector<DominatorTree::UpdateType> Updates;
- for (auto *BB : DeadBlockSet) {
- for (BasicBlock *Successor : successors(BB)) {
- if (!DeadBlockSet.count(Successor))
- Successor->removePredecessor(BB);
- if (DTU)
- Updates.push_back({DominatorTree::Delete, BB, Successor});
- }
- if (LVI)
- LVI->eraseBlock(BB);
- BB->dropAllReferences();
- }
- for (Function::iterator I = ++F.begin(); I != F.end();) {
- auto *BB = &*I;
- if (Reachable.count(BB)) {
- ++I;
- continue;
- }
- if (DTU) {
- // Remove the terminator of BB to clear the successor list of BB.
- if (BB->getTerminator())
- BB->getInstList().pop_back();
- new UnreachableInst(BB->getContext(), BB);
- assert(succ_empty(BB) && "The successor list of BB isn't empty before "
- "applying corresponding DTU updates.");
- ++I;
- } else {
- I = F.getBasicBlockList().erase(I);
- }
- }
-
- if (DTU) {
- DTU->applyUpdatesPermissive(Updates);
- bool Deleted = false;
- for (auto *BB : DeadBlockSet) {
- if (DTU->isBBPendingDeletion(BB))
- --NumRemoved;
- else
- Deleted = true;
- DTU->deleteBB(BB);
- }
- if (!Deleted)
- return false;
- }
- return true;
-}
-
-void llvm::combineMetadata(Instruction *K, const Instruction *J,
- ArrayRef<unsigned> KnownIDs, bool DoesKMove) {
- SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
- K->dropUnknownNonDebugMetadata(KnownIDs);
- K->getAllMetadataOtherThanDebugLoc(Metadata);
- for (const auto &MD : Metadata) {
- unsigned Kind = MD.first;
- MDNode *JMD = J->getMetadata(Kind);
- MDNode *KMD = MD.second;
-
- switch (Kind) {
- default:
- K->setMetadata(Kind, nullptr); // Remove unknown metadata
- break;
- case LLVMContext::MD_dbg:
- llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg");
- case LLVMContext::MD_tbaa:
- K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD));
- break;
- case LLVMContext::MD_alias_scope:
- K->setMetadata(Kind, MDNode::getMostGenericAliasScope(JMD, KMD));
- break;
- case LLVMContext::MD_noalias:
- case LLVMContext::MD_mem_parallel_loop_access:
- K->setMetadata(Kind, MDNode::intersect(JMD, KMD));
- break;
- case LLVMContext::MD_access_group:
- K->setMetadata(LLVMContext::MD_access_group,
- intersectAccessGroups(K, J));
- break;
- case LLVMContext::MD_range:
-
- // If K does move, use most generic range. Otherwise keep the range of
- // K.
- if (DoesKMove)
- // FIXME: If K does move, we should drop the range info and nonnull.
- // Currently this function is used with DoesKMove in passes
- // doing hoisting/sinking and the current behavior of using the
- // most generic range is correct in those cases.
- K->setMetadata(Kind, MDNode::getMostGenericRange(JMD, KMD));
- break;
- case LLVMContext::MD_fpmath:
- K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD));
- break;
- case LLVMContext::MD_invariant_load:
- // Only set the !invariant.load if it is present in both instructions.
- K->setMetadata(Kind, JMD);
- break;
- case LLVMContext::MD_nonnull:
- // If K does move, keep nonull if it is present in both instructions.
- if (DoesKMove)
- K->setMetadata(Kind, JMD);
- break;
- case LLVMContext::MD_invariant_group:
- // Preserve !invariant.group in K.
- break;
- case LLVMContext::MD_align:
- K->setMetadata(Kind,
- MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
- break;
- case LLVMContext::MD_dereferenceable:
- case LLVMContext::MD_dereferenceable_or_null:
- K->setMetadata(Kind,
- MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
- break;
- }
- }
- // Set !invariant.group from J if J has it. If both instructions have it
- // then we will just pick it from J - even when they are different.
- // Also make sure that K is load or store - f.e. combining bitcast with load
- // could produce bitcast with invariant.group metadata, which is invalid.
- // FIXME: we should try to preserve both invariant.group md if they are
- // different, but right now instruction can only have one invariant.group.
- if (auto *JMD = J->getMetadata(LLVMContext::MD_invariant_group))
- if (isa<LoadInst>(K) || isa<StoreInst>(K))
- K->setMetadata(LLVMContext::MD_invariant_group, JMD);
-}
-
-void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
- bool KDominatesJ) {
- unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias, LLVMContext::MD_range,
- LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull,
- LLVMContext::MD_invariant_group, LLVMContext::MD_align,
- LLVMContext::MD_dereferenceable,
- LLVMContext::MD_dereferenceable_or_null,
- LLVMContext::MD_access_group};
- combineMetadata(K, J, KnownIDs, KDominatesJ);
-}
-
-void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) {
- auto *ReplInst = dyn_cast<Instruction>(Repl);
- if (!ReplInst)
- return;
-
- // Patch the replacement so that it is not more restrictive than the value
- // being replaced.
- // Note that if 'I' is a load being replaced by some operation,
- // for example, by an arithmetic operation, then andIRFlags()
- // would just erase all math flags from the original arithmetic
- // operation, which is clearly not wanted and not needed.
- if (!isa<LoadInst>(I))
- ReplInst->andIRFlags(I);
-
- // FIXME: If both the original and replacement value are part of the
- // same control-flow region (meaning that the execution of one
- // guarantees the execution of the other), then we can combine the
- // noalias scopes here and do better than the general conservative
- // answer used in combineMetadata().
-
- // In general, GVN unifies expressions over different control-flow
- // regions, and so we need a conservative combination of the noalias
- // scopes.
- static const unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias, LLVMContext::MD_range,
- LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
- LLVMContext::MD_invariant_group, LLVMContext::MD_nonnull,
- LLVMContext::MD_access_group};
- combineMetadata(ReplInst, I, KnownIDs, false);
-}
-
-template <typename RootType, typename DominatesFn>
-static unsigned replaceDominatedUsesWith(Value *From, Value *To,
- const RootType &Root,
- const DominatesFn &Dominates) {
- assert(From->getType() == To->getType());
-
- unsigned Count = 0;
- for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
- UI != UE;) {
- Use &U = *UI++;
- if (!Dominates(Root, U))
- continue;
- U.set(To);
- LLVM_DEBUG(dbgs() << "Replace dominated use of '" << From->getName()
- << "' as " << *To << " in " << *U << "\n");
- ++Count;
- }
- return Count;
-}
-
-unsigned llvm::replaceNonLocalUsesWith(Instruction *From, Value *To) {
- assert(From->getType() == To->getType());
- auto *BB = From->getParent();
- unsigned Count = 0;
-
- for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
- UI != UE;) {
- Use &U = *UI++;
- auto *I = cast<Instruction>(U.getUser());
- if (I->getParent() == BB)
- continue;
- U.set(To);
- ++Count;
- }
- return Count;
-}
-
-unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
- DominatorTree &DT,
- const BasicBlockEdge &Root) {
- auto Dominates = [&DT](const BasicBlockEdge &Root, const Use &U) {
- return DT.dominates(Root, U);
- };
- return ::replaceDominatedUsesWith(From, To, Root, Dominates);
-}
-
-unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
- DominatorTree &DT,
- const BasicBlock *BB) {
- auto ProperlyDominates = [&DT](const BasicBlock *BB, const Use &U) {
- auto *I = cast<Instruction>(U.getUser())->getParent();
- return DT.properlyDominates(BB, I);
- };
- return ::replaceDominatedUsesWith(From, To, BB, ProperlyDominates);
-}
-
-bool llvm::callsGCLeafFunction(const CallBase *Call,
- const TargetLibraryInfo &TLI) {
- // Check if the function is specifically marked as a gc leaf function.
- if (Call->hasFnAttr("gc-leaf-function"))
- return true;
- if (const Function *F = Call->getCalledFunction()) {
- if (F->hasFnAttribute("gc-leaf-function"))
- return true;
-
- if (auto IID = F->getIntrinsicID())
- // Most LLVM intrinsics do not take safepoints.
- return IID != Intrinsic::experimental_gc_statepoint &&
- IID != Intrinsic::experimental_deoptimize;
- }
-
- // Lib calls can be materialized by some passes, and won't be
- // marked as 'gc-leaf-function.' All available Libcalls are
- // GC-leaf.
- LibFunc LF;
- if (TLI.getLibFunc(ImmutableCallSite(Call), LF)) {
- return TLI.has(LF);
- }
-
- return false;
-}
-
-void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N,
- LoadInst &NewLI) {
- auto *NewTy = NewLI.getType();
-
- // This only directly applies if the new type is also a pointer.
- if (NewTy->isPointerTy()) {
- NewLI.setMetadata(LLVMContext::MD_nonnull, N);
- return;
- }
-
- // The only other translation we can do is to integral loads with !range
- // metadata.
- if (!NewTy->isIntegerTy())
- return;
-
- MDBuilder MDB(NewLI.getContext());
- const Value *Ptr = OldLI.getPointerOperand();
- auto *ITy = cast<IntegerType>(NewTy);
- auto *NullInt = ConstantExpr::getPtrToInt(
- ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
- auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
- NewLI.setMetadata(LLVMContext::MD_range,
- MDB.createRange(NonNullInt, NullInt));
-}
-
-void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
- MDNode *N, LoadInst &NewLI) {
- auto *NewTy = NewLI.getType();
-
- // Give up unless it is converted to a pointer where there is a single very
- // valuable mapping we can do reliably.
- // FIXME: It would be nice to propagate this in more ways, but the type
- // conversions make it hard.
- if (!NewTy->isPointerTy())
- return;
-
- unsigned BitWidth = DL.getIndexTypeSizeInBits(NewTy);
- if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
- MDNode *NN = MDNode::get(OldLI.getContext(), None);
- NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
- }
-}
-
-void llvm::dropDebugUsers(Instruction &I) {
- SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
- findDbgUsers(DbgUsers, &I);
- for (auto *DII : DbgUsers)
- DII->eraseFromParent();
-}
-
-void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
- BasicBlock *BB) {
- // Since we are moving the instructions out of its basic block, we do not
- // retain their original debug locations (DILocations) and debug intrinsic
- // instructions.
- //
- // Doing so would degrade the debugging experience and adversely affect the
- // accuracy of profiling information.
- //
- // Currently, when hoisting the instructions, we take the following actions:
- // - Remove their debug intrinsic instructions.
- // - Set their debug locations to the values from the insertion point.
- //
- // As per PR39141 (comment #8), the more fundamental reason why the dbg.values
- // need to be deleted, is because there will not be any instructions with a
- // DILocation in either branch left after performing the transformation. We
- // can only insert a dbg.value after the two branches are joined again.
- //
- // See PR38762, PR39243 for more details.
- //
- // TODO: Extend llvm.dbg.value to take more than one SSA Value (PR39141) to
- // encode predicated DIExpressions that yield different results on different
- // code paths.
- for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
- Instruction *I = &*II;
- I->dropUnknownNonDebugMetadata();
- if (I->isUsedByMetadata())
- dropDebugUsers(*I);
- if (isa<DbgInfoIntrinsic>(I)) {
- // Remove DbgInfo Intrinsics.
- II = I->eraseFromParent();
- continue;
- }
- I->setDebugLoc(InsertPt->getDebugLoc());
- ++II;
- }
- DomBlock->getInstList().splice(InsertPt->getIterator(), BB->getInstList(),
- BB->begin(),
- BB->getTerminator()->getIterator());
-}
-
-namespace {
-
-/// A potential constituent of a bitreverse or bswap expression. See
-/// collectBitParts for a fuller explanation.
-struct BitPart {
- BitPart(Value *P, unsigned BW) : Provider(P) {
- Provenance.resize(BW);
- }
-
- /// The Value that this is a bitreverse/bswap of.
- Value *Provider;
-
- /// The "provenance" of each bit. Provenance[A] = B means that bit A
- /// in Provider becomes bit B in the result of this expression.
- SmallVector<int8_t, 32> Provenance; // int8_t means max size is i128.
-
- enum { Unset = -1 };
-};
-
-} // end anonymous namespace
-
-/// Analyze the specified subexpression and see if it is capable of providing
-/// pieces of a bswap or bitreverse. The subexpression provides a potential
-/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in
-/// the output of the expression came from a corresponding bit in some other
-/// value. This function is recursive, and the end result is a mapping of
-/// bitnumber to bitnumber. It is the caller's responsibility to validate that
-/// the bitnumber to bitnumber mapping is correct for a bswap or bitreverse.
-///
-/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know
-/// that the expression deposits the low byte of %X into the high byte of the
-/// result and that all other bits are zero. This expression is accepted and a
-/// BitPart is returned with Provider set to %X and Provenance[24-31] set to
-/// [0-7].
-///
-/// To avoid revisiting values, the BitPart results are memoized into the
-/// provided map. To avoid unnecessary copying of BitParts, BitParts are
-/// constructed in-place in the \c BPS map. Because of this \c BPS needs to
-/// store BitParts objects, not pointers. As we need the concept of a nullptr
-/// BitParts (Value has been analyzed and the analysis failed), we an Optional
-/// type instead to provide the same functionality.
-///
-/// Because we pass around references into \c BPS, we must use a container that
-/// does not invalidate internal references (std::map instead of DenseMap).
-static const Optional<BitPart> &
-collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
- std::map<Value *, Optional<BitPart>> &BPS, int Depth) {
- auto I = BPS.find(V);
- if (I != BPS.end())
- return I->second;
-
- auto &Result = BPS[V] = None;
- auto BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
-
- // Prevent stack overflow by limiting the recursion depth
- if (Depth == BitPartRecursionMaxDepth) {
- LLVM_DEBUG(dbgs() << "collectBitParts max recursion depth reached.\n");
- return Result;
- }
-
- if (Instruction *I = dyn_cast<Instruction>(V)) {
- // If this is an or instruction, it may be an inner node of the bswap.
- if (I->getOpcode() == Instruction::Or) {
- auto &A = collectBitParts(I->getOperand(0), MatchBSwaps,
- MatchBitReversals, BPS, Depth + 1);
- auto &B = collectBitParts(I->getOperand(1), MatchBSwaps,
- MatchBitReversals, BPS, Depth + 1);
- if (!A || !B)
- return Result;
-
- // Try and merge the two together.
- if (!A->Provider || A->Provider != B->Provider)
- return Result;
-
- Result = BitPart(A->Provider, BitWidth);
- for (unsigned i = 0; i < A->Provenance.size(); ++i) {
- if (A->Provenance[i] != BitPart::Unset &&
- B->Provenance[i] != BitPart::Unset &&
- A->Provenance[i] != B->Provenance[i])
- return Result = None;
-
- if (A->Provenance[i] == BitPart::Unset)
- Result->Provenance[i] = B->Provenance[i];
- else
- Result->Provenance[i] = A->Provenance[i];
- }
-
- return Result;
- }
-
- // If this is a logical shift by a constant, recurse then shift the result.
- if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
- unsigned BitShift =
- cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
- // Ensure the shift amount is defined.
- if (BitShift > BitWidth)
- return Result;
-
- auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
- MatchBitReversals, BPS, Depth + 1);
- if (!Res)
- return Result;
- Result = Res;
-
- // Perform the "shift" on BitProvenance.
- auto &P = Result->Provenance;
- if (I->getOpcode() == Instruction::Shl) {
- P.erase(std::prev(P.end(), BitShift), P.end());
- P.insert(P.begin(), BitShift, BitPart::Unset);
- } else {
- P.erase(P.begin(), std::next(P.begin(), BitShift));
- P.insert(P.end(), BitShift, BitPart::Unset);
- }
-
- return Result;
- }
-
- // If this is a logical 'and' with a mask that clears bits, recurse then
- // unset the appropriate bits.
- if (I->getOpcode() == Instruction::And &&
- isa<ConstantInt>(I->getOperand(1))) {
- APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1);
- const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
-
- // Check that the mask allows a multiple of 8 bits for a bswap, for an
- // early exit.
- unsigned NumMaskedBits = AndMask.countPopulation();
- if (!MatchBitReversals && NumMaskedBits % 8 != 0)
- return Result;
-
- auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
- MatchBitReversals, BPS, Depth + 1);
- if (!Res)
- return Result;
- Result = Res;
-
- for (unsigned i = 0; i < BitWidth; ++i, Bit <<= 1)
- // If the AndMask is zero for this bit, clear the bit.
- if ((AndMask & Bit) == 0)
- Result->Provenance[i] = BitPart::Unset;
- return Result;
- }
-
- // If this is a zext instruction zero extend the result.
- if (I->getOpcode() == Instruction::ZExt) {
- auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
- MatchBitReversals, BPS, Depth + 1);
- if (!Res)
- return Result;
-
- Result = BitPart(Res->Provider, BitWidth);
- auto NarrowBitWidth =
- cast<IntegerType>(cast<ZExtInst>(I)->getSrcTy())->getBitWidth();
- for (unsigned i = 0; i < NarrowBitWidth; ++i)
- Result->Provenance[i] = Res->Provenance[i];
- for (unsigned i = NarrowBitWidth; i < BitWidth; ++i)
- Result->Provenance[i] = BitPart::Unset;
- return Result;
- }
- }
-
- // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be
- // the input value to the bswap/bitreverse.
- Result = BitPart(V, BitWidth);
- for (unsigned i = 0; i < BitWidth; ++i)
- Result->Provenance[i] = i;
- return Result;
-}
-
-static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To,
- unsigned BitWidth) {
- if (From % 8 != To % 8)
- return false;
- // Convert from bit indices to byte indices and check for a byte reversal.
- From >>= 3;
- To >>= 3;
- BitWidth >>= 3;
- return From == BitWidth - To - 1;
-}
-
-static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To,
- unsigned BitWidth) {
- return From == BitWidth - To - 1;
-}
-
-bool llvm::recognizeBSwapOrBitReverseIdiom(
- Instruction *I, bool MatchBSwaps, bool MatchBitReversals,
- SmallVectorImpl<Instruction *> &InsertedInsts) {
- if (Operator::getOpcode(I) != Instruction::Or)
- return false;
- if (!MatchBSwaps && !MatchBitReversals)
- return false;
- IntegerType *ITy = dyn_cast<IntegerType>(I->getType());
- if (!ITy || ITy->getBitWidth() > 128)
- return false; // Can't do vectors or integers > 128 bits.
- unsigned BW = ITy->getBitWidth();
-
- unsigned DemandedBW = BW;
- IntegerType *DemandedTy = ITy;
- if (I->hasOneUse()) {
- if (TruncInst *Trunc = dyn_cast<TruncInst>(I->user_back())) {
- DemandedTy = cast<IntegerType>(Trunc->getType());
- DemandedBW = DemandedTy->getBitWidth();
- }
- }
-
- // Try to find all the pieces corresponding to the bswap.
- std::map<Value *, Optional<BitPart>> BPS;
- auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS, 0);
- if (!Res)
- return false;
- auto &BitProvenance = Res->Provenance;
-
- // Now, is the bit permutation correct for a bswap or a bitreverse? We can
- // only byteswap values with an even number of bytes.
- bool OKForBSwap = DemandedBW % 16 == 0, OKForBitReverse = true;
- for (unsigned i = 0; i < DemandedBW; ++i) {
- OKForBSwap &=
- bitTransformIsCorrectForBSwap(BitProvenance[i], i, DemandedBW);
- OKForBitReverse &=
- bitTransformIsCorrectForBitReverse(BitProvenance[i], i, DemandedBW);
- }
-
- Intrinsic::ID Intrin;
- if (OKForBSwap && MatchBSwaps)
- Intrin = Intrinsic::bswap;
- else if (OKForBitReverse && MatchBitReversals)
- Intrin = Intrinsic::bitreverse;
- else
- return false;
-
- if (ITy != DemandedTy) {
- Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy);
- Value *Provider = Res->Provider;
- IntegerType *ProviderTy = cast<IntegerType>(Provider->getType());
- // We may need to truncate the provider.
- if (DemandedTy != ProviderTy) {
- auto *Trunc = CastInst::Create(Instruction::Trunc, Provider, DemandedTy,
- "trunc", I);
- InsertedInsts.push_back(Trunc);
- Provider = Trunc;
- }
- auto *CI = CallInst::Create(F, Provider, "rev", I);
- InsertedInsts.push_back(CI);
- auto *ExtInst = CastInst::Create(Instruction::ZExt, CI, ITy, "zext", I);
- InsertedInsts.push_back(ExtInst);
- return true;
- }
-
- Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, ITy);
- InsertedInsts.push_back(CallInst::Create(F, Res->Provider, "rev", I));
- return true;
-}
-
-// CodeGen has special handling for some string functions that may replace
-// them with target-specific intrinsics. Since that'd skip our interceptors
-// in ASan/MSan/TSan/DFSan, and thus make us miss some memory accesses,
-// we mark affected calls as NoBuiltin, which will disable optimization
-// in CodeGen.
-void llvm::maybeMarkSanitizerLibraryCallNoBuiltin(
- CallInst *CI, const TargetLibraryInfo *TLI) {
- Function *F = CI->getCalledFunction();
- LibFunc Func;
- if (F && !F->hasLocalLinkage() && F->hasName() &&
- TLI->getLibFunc(F->getName(), Func) && TLI->hasOptimizedCodeGen(Func) &&
- !F->doesNotAccessMemory())
- CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoBuiltin);
-}
-
-bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
- // We can't have a PHI with a metadata type.
- if (I->getOperand(OpIdx)->getType()->isMetadataTy())
- return false;
-
- // Early exit.
- if (!isa<Constant>(I->getOperand(OpIdx)))
- return true;
-
- switch (I->getOpcode()) {
- default:
- return true;
- case Instruction::Call:
- case Instruction::Invoke:
- // Can't handle inline asm. Skip it.
- if (isa<InlineAsm>(ImmutableCallSite(I).getCalledValue()))
- return false;
- // Many arithmetic intrinsics have no issue taking a
- // variable, however it's hard to distingish these from
- // specials such as @llvm.frameaddress that require a constant.
- if (isa<IntrinsicInst>(I))
- return false;
-
- // Constant bundle operands may need to retain their constant-ness for
- // correctness.
- if (ImmutableCallSite(I).isBundleOperand(OpIdx))
- return false;
- return true;
- case Instruction::ShuffleVector:
- // Shufflevector masks are constant.
- return OpIdx != 2;
- case Instruction::Switch:
- case Instruction::ExtractValue:
- // All operands apart from the first are constant.
- return OpIdx == 0;
- case Instruction::InsertValue:
- // All operands apart from the first and the second are constant.
- return OpIdx < 2;
- case Instruction::Alloca:
- // Static allocas (constant size in the entry block) are handled by
- // prologue/epilogue insertion so they're free anyway. We definitely don't
- // want to make them non-constant.
- return !cast<AllocaInst>(I)->isStaticAlloca();
- case Instruction::GetElementPtr:
- if (OpIdx == 0)
- return true;
- gep_type_iterator It = gep_type_begin(I);
- for (auto E = std::next(It, OpIdx); It != E; ++It)
- if (It.isStruct())
- return false;
- return true;
- }
-}
-
-using AllocaForValueMapTy = DenseMap<Value *, AllocaInst *>;
-AllocaInst *llvm::findAllocaForValue(Value *V,
- AllocaForValueMapTy &AllocaForValue) {
- if (AllocaInst *AI = dyn_cast<AllocaInst>(V))
- return AI;
- // See if we've already calculated (or started to calculate) alloca for a
- // given value.
- AllocaForValueMapTy::iterator I = AllocaForValue.find(V);
- if (I != AllocaForValue.end())
- return I->second;
- // Store 0 while we're calculating alloca for value V to avoid
- // infinite recursion if the value references itself.
- AllocaForValue[V] = nullptr;
- AllocaInst *Res = nullptr;
- if (CastInst *CI = dyn_cast<CastInst>(V))
- Res = findAllocaForValue(CI->getOperand(0), AllocaForValue);
- else if (PHINode *PN = dyn_cast<PHINode>(V)) {
- for (Value *IncValue : PN->incoming_values()) {
- // Allow self-referencing phi-nodes.
- if (IncValue == PN)
- continue;
- AllocaInst *IncValueAI = findAllocaForValue(IncValue, AllocaForValue);
- // AI for incoming values should exist and should all be equal.
- if (IncValueAI == nullptr || (Res != nullptr && IncValueAI != Res))
- return nullptr;
- Res = IncValueAI;
- }
- } else if (GetElementPtrInst *EP = dyn_cast<GetElementPtrInst>(V)) {
- Res = findAllocaForValue(EP->getPointerOperand(), AllocaForValue);
- } else {
- LLVM_DEBUG(dbgs() << "Alloca search cancelled on unknown instruction: "
- << *V << "\n");
- }
- if (Res)
- AllocaForValue[V] = Res;
- return Res;
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
deleted file mode 100644
index 37389a695b45..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ /dev/null
@@ -1,690 +0,0 @@
-//===----------------- LoopRotationUtils.cpp -----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides utilities to convert a loop into a loop with bottom test.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/LoopRotationUtils.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/MemorySSA.h"
-#include "llvm/Analysis/MemorySSAUpdater.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "loop-rotate"
-
-STATISTIC(NumRotated, "Number of loops rotated");
-
-namespace {
-/// A simple loop rotation transformation.
-class LoopRotate {
- const unsigned MaxHeaderSize;
- LoopInfo *LI;
- const TargetTransformInfo *TTI;
- AssumptionCache *AC;
- DominatorTree *DT;
- ScalarEvolution *SE;
- MemorySSAUpdater *MSSAU;
- const SimplifyQuery &SQ;
- bool RotationOnly;
- bool IsUtilMode;
-
-public:
- LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
- const TargetTransformInfo *TTI, AssumptionCache *AC,
- DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
- const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode)
- : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
- MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
- IsUtilMode(IsUtilMode) {}
- bool processLoop(Loop *L);
-
-private:
- bool rotateLoop(Loop *L, bool SimplifiedLatch);
- bool simplifyLoopLatch(Loop *L);
-};
-} // end anonymous namespace
-
-/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
-/// old header into the preheader. If there were uses of the values produced by
-/// these instruction that were outside of the loop, we have to insert PHI nodes
-/// to merge the two values. Do this now.
-static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
- BasicBlock *OrigPreheader,
- ValueToValueMapTy &ValueMap,
- SmallVectorImpl<PHINode*> *InsertedPHIs) {
- // Remove PHI node entries that are no longer live.
- BasicBlock::iterator I, E = OrigHeader->end();
- for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
- PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
-
- // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
- // as necessary.
- SSAUpdater SSA(InsertedPHIs);
- for (I = OrigHeader->begin(); I != E; ++I) {
- Value *OrigHeaderVal = &*I;
-
- // If there are no uses of the value (e.g. because it returns void), there
- // is nothing to rewrite.
- if (OrigHeaderVal->use_empty())
- continue;
-
- Value *OrigPreHeaderVal = ValueMap.lookup(OrigHeaderVal);
-
- // The value now exits in two versions: the initial value in the preheader
- // and the loop "next" value in the original header.
- SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
- SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
- SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
-
- // Visit each use of the OrigHeader instruction.
- for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
- UE = OrigHeaderVal->use_end();
- UI != UE;) {
- // Grab the use before incrementing the iterator.
- Use &U = *UI;
-
- // Increment the iterator before removing the use from the list.
- ++UI;
-
- // SSAUpdater can't handle a non-PHI use in the same block as an
- // earlier def. We can easily handle those cases manually.
- Instruction *UserInst = cast<Instruction>(U.getUser());
- if (!isa<PHINode>(UserInst)) {
- BasicBlock *UserBB = UserInst->getParent();
-
- // The original users in the OrigHeader are already using the
- // original definitions.
- if (UserBB == OrigHeader)
- continue;
-
- // Users in the OrigPreHeader need to use the value to which the
- // original definitions are mapped.
- if (UserBB == OrigPreheader) {
- U = OrigPreHeaderVal;
- continue;
- }
- }
-
- // Anything else can be handled by SSAUpdater.
- SSA.RewriteUse(U);
- }
-
- // Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug
- // intrinsics.
- SmallVector<DbgValueInst *, 1> DbgValues;
- llvm::findDbgValues(DbgValues, OrigHeaderVal);
- for (auto &DbgValue : DbgValues) {
- // The original users in the OrigHeader are already using the original
- // definitions.
- BasicBlock *UserBB = DbgValue->getParent();
- if (UserBB == OrigHeader)
- continue;
-
- // Users in the OrigPreHeader need to use the value to which the
- // original definitions are mapped and anything else can be handled by
- // the SSAUpdater. To avoid adding PHINodes, check if the value is
- // available in UserBB, if not substitute undef.
- Value *NewVal;
- if (UserBB == OrigPreheader)
- NewVal = OrigPreHeaderVal;
- else if (SSA.HasValueForBlock(UserBB))
- NewVal = SSA.GetValueInMiddleOfBlock(UserBB);
- else
- NewVal = UndefValue::get(OrigHeaderVal->getType());
- DbgValue->setOperand(0,
- MetadataAsValue::get(OrigHeaderVal->getContext(),
- ValueAsMetadata::get(NewVal)));
- }
- }
-}
-
-// Look for a phi which is only used outside the loop (via a LCSSA phi)
-// in the exit from the header. This means that rotating the loop can
-// remove the phi.
-static bool shouldRotateLoopExitingLatch(Loop *L) {
- BasicBlock *Header = L->getHeader();
- BasicBlock *HeaderExit = Header->getTerminator()->getSuccessor(0);
- if (L->contains(HeaderExit))
- HeaderExit = Header->getTerminator()->getSuccessor(1);
-
- for (auto &Phi : Header->phis()) {
- // Look for uses of this phi in the loop/via exits other than the header.
- if (llvm::any_of(Phi.users(), [HeaderExit](const User *U) {
- return cast<Instruction>(U)->getParent() != HeaderExit;
- }))
- continue;
- return true;
- }
-
- return false;
-}
-
-/// Rotate loop LP. Return true if the loop is rotated.
-///
-/// \param SimplifiedLatch is true if the latch was just folded into the final
-/// loop exit. In this case we may want to rotate even though the new latch is
-/// now an exiting branch. This rotation would have happened had the latch not
-/// been simplified. However, if SimplifiedLatch is false, then we avoid
-/// rotating loops in which the latch exits to avoid excessive or endless
-/// rotation. LoopRotate should be repeatable and converge to a canonical
-/// form. This property is satisfied because simplifying the loop latch can only
-/// happen once across multiple invocations of the LoopRotate pass.
-bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
- // If the loop has only one block then there is not much to rotate.
- if (L->getBlocks().size() == 1)
- return false;
-
- BasicBlock *OrigHeader = L->getHeader();
- BasicBlock *OrigLatch = L->getLoopLatch();
-
- BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
- if (!BI || BI->isUnconditional())
- return false;
-
- // If the loop header is not one of the loop exiting blocks then
- // either this loop is already rotated or it is not
- // suitable for loop rotation transformations.
- if (!L->isLoopExiting(OrigHeader))
- return false;
-
- // If the loop latch already contains a branch that leaves the loop then the
- // loop is already rotated.
- if (!OrigLatch)
- return false;
-
- // Rotate if either the loop latch does *not* exit the loop, or if the loop
- // latch was just simplified. Or if we think it will be profitable.
- if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false &&
- !shouldRotateLoopExitingLatch(L))
- return false;
-
- // Check size of original header and reject loop if it is very big or we can't
- // duplicate blocks inside it.
- {
- SmallPtrSet<const Value *, 32> EphValues;
- CodeMetrics::collectEphemeralValues(L, AC, EphValues);
-
- CodeMetrics Metrics;
- Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues);
- if (Metrics.notDuplicatable) {
- LLVM_DEBUG(
- dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
- << " instructions: ";
- L->dump());
- return false;
- }
- if (Metrics.convergent) {
- LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent "
- "instructions: ";
- L->dump());
- return false;
- }
- if (Metrics.NumInsts > MaxHeaderSize)
- return false;
- }
-
- // Now, this loop is suitable for rotation.
- BasicBlock *OrigPreheader = L->getLoopPreheader();
-
- // If the loop could not be converted to canonical form, it must have an
- // indirectbr in it, just give up.
- if (!OrigPreheader || !L->hasDedicatedExits())
- return false;
-
- // Anything ScalarEvolution may know about this loop or the PHI nodes
- // in its header will soon be invalidated. We should also invalidate
- // all outer loops because insertion and deletion of blocks that happens
- // during the rotation may violate invariants related to backedge taken
- // infos in them.
- if (SE)
- SE->forgetTopmostLoop(L);
-
- LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
- if (MSSAU && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
-
- // Find new Loop header. NewHeader is a Header's one and only successor
- // that is inside loop. Header's other successor is outside the
- // loop. Otherwise loop is not suitable for rotation.
- BasicBlock *Exit = BI->getSuccessor(0);
- BasicBlock *NewHeader = BI->getSuccessor(1);
- if (L->contains(Exit))
- std::swap(Exit, NewHeader);
- assert(NewHeader && "Unable to determine new loop header");
- assert(L->contains(NewHeader) && !L->contains(Exit) &&
- "Unable to determine loop header and exit blocks");
-
- // This code assumes that the new header has exactly one predecessor.
- // Remove any single-entry PHI nodes in it.
- assert(NewHeader->getSinglePredecessor() &&
- "New header doesn't have one pred!");
- FoldSingleEntryPHINodes(NewHeader);
-
- // Begin by walking OrigHeader and populating ValueMap with an entry for
- // each Instruction.
- BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
- ValueToValueMapTy ValueMap, ValueMapMSSA;
-
- // For PHI nodes, the value available in OldPreHeader is just the
- // incoming value from OldPreHeader.
- for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
- ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
-
- // For the rest of the instructions, either hoist to the OrigPreheader if
- // possible or create a clone in the OldPreHeader if not.
- Instruction *LoopEntryBranch = OrigPreheader->getTerminator();
-
- // Record all debug intrinsics preceding LoopEntryBranch to avoid duplication.
- using DbgIntrinsicHash =
- std::pair<std::pair<Value *, DILocalVariable *>, DIExpression *>;
- auto makeHash = [](DbgVariableIntrinsic *D) -> DbgIntrinsicHash {
- return {{D->getVariableLocation(), D->getVariable()}, D->getExpression()};
- };
- SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics;
- for (auto I = std::next(OrigPreheader->rbegin()), E = OrigPreheader->rend();
- I != E; ++I) {
- if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&*I))
- DbgIntrinsics.insert(makeHash(DII));
- else
- break;
- }
-
- while (I != E) {
- Instruction *Inst = &*I++;
-
- // If the instruction's operands are invariant and it doesn't read or write
- // memory, then it is safe to hoist. Doing this doesn't change the order of
- // execution in the preheader, but does prevent the instruction from
- // executing in each iteration of the loop. This means it is safe to hoist
- // something that might trap, but isn't safe to hoist something that reads
- // memory (without proving that the loop doesn't write).
- if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() &&
- !Inst->mayWriteToMemory() && !Inst->isTerminator() &&
- !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
- Inst->moveBefore(LoopEntryBranch);
- continue;
- }
-
- // Otherwise, create a duplicate of the instruction.
- Instruction *C = Inst->clone();
-
- // Eagerly remap the operands of the instruction.
- RemapInstruction(C, ValueMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
-
- // Avoid inserting the same intrinsic twice.
- if (auto *DII = dyn_cast<DbgVariableIntrinsic>(C))
- if (DbgIntrinsics.count(makeHash(DII))) {
- C->deleteValue();
- continue;
- }
-
- // With the operands remapped, see if the instruction constant folds or is
- // otherwise simplifyable. This commonly occurs because the entry from PHI
- // nodes allows icmps and other instructions to fold.
- Value *V = SimplifyInstruction(C, SQ);
- if (V && LI->replacementPreservesLCSSAForm(C, V)) {
- // If so, then delete the temporary instruction and stick the folded value
- // in the map.
- ValueMap[Inst] = V;
- if (!C->mayHaveSideEffects()) {
- C->deleteValue();
- C = nullptr;
- }
- } else {
- ValueMap[Inst] = C;
- }
- if (C) {
- // Otherwise, stick the new instruction into the new block!
- C->setName(Inst->getName());
- C->insertBefore(LoopEntryBranch);
-
- if (auto *II = dyn_cast<IntrinsicInst>(C))
- if (II->getIntrinsicID() == Intrinsic::assume)
- AC->registerAssumption(II);
- // MemorySSA cares whether the cloned instruction was inserted or not, and
- // not whether it can be remapped to a simplified value.
- ValueMapMSSA[Inst] = C;
- }
- }
-
- // Along with all the other instructions, we just cloned OrigHeader's
- // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
- // successors by duplicating their incoming values for OrigHeader.
- for (BasicBlock *SuccBB : successors(OrigHeader))
- for (BasicBlock::iterator BI = SuccBB->begin();
- PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
- PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
-
- // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
- // OrigPreHeader's old terminator (the original branch into the loop), and
- // remove the corresponding incoming values from the PHI nodes in OrigHeader.
- LoopEntryBranch->eraseFromParent();
-
- // Update MemorySSA before the rewrite call below changes the 1:1
- // instruction:cloned_instruction_or_value mapping.
- if (MSSAU) {
- ValueMapMSSA[OrigHeader] = OrigPreheader;
- MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader,
- ValueMapMSSA);
- }
-
- SmallVector<PHINode*, 2> InsertedPHIs;
- // If there were any uses of instructions in the duplicated block outside the
- // loop, update them, inserting PHI nodes as required
- RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap,
- &InsertedPHIs);
-
- // Attach dbg.value intrinsics to the new phis if that phi uses a value that
- // previously had debug metadata attached. This keeps the debug info
- // up-to-date in the loop body.
- if (!InsertedPHIs.empty())
- insertDebugValuesForPHIs(OrigHeader, InsertedPHIs);
-
- // NewHeader is now the header of the loop.
- L->moveToHeader(NewHeader);
- assert(L->getHeader() == NewHeader && "Latch block is our new header");
-
- // Inform DT about changes to the CFG.
- if (DT) {
- // The OrigPreheader branches to the NewHeader and Exit now. Then, inform
- // the DT about the removed edge to the OrigHeader (that got removed).
- SmallVector<DominatorTree::UpdateType, 3> Updates;
- Updates.push_back({DominatorTree::Insert, OrigPreheader, Exit});
- Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader});
- Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader});
- DT->applyUpdates(Updates);
-
- if (MSSAU) {
- MSSAU->applyUpdates(Updates, *DT);
- if (VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
- }
- }
-
- // At this point, we've finished our major CFG changes. As part of cloning
- // the loop into the preheader we've simplified instructions and the
- // duplicated conditional branch may now be branching on a constant. If it is
- // branching on a constant and if that constant means that we enter the loop,
- // then we fold away the cond branch to an uncond branch. This simplifies the
- // loop in cases important for nested loops, and it also means we don't have
- // to split as many edges.
- BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
- assert(PHBI->isConditional() && "Should be clone of BI condbr!");
- if (!isa<ConstantInt>(PHBI->getCondition()) ||
- PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) !=
- NewHeader) {
- // The conditional branch can't be folded, handle the general case.
- // Split edges as necessary to preserve LoopSimplify form.
-
- // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
- // thus is not a preheader anymore.
- // Split the edge to form a real preheader.
- BasicBlock *NewPH = SplitCriticalEdge(
- OrigPreheader, NewHeader,
- CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
- NewPH->setName(NewHeader->getName() + ".lr.ph");
-
- // Preserve canonical loop form, which means that 'Exit' should have only
- // one predecessor. Note that Exit could be an exit block for multiple
- // nested loops, causing both of the edges to now be critical and need to
- // be split.
- SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit));
- bool SplitLatchEdge = false;
- for (BasicBlock *ExitPred : ExitPreds) {
- // We only need to split loop exit edges.
- Loop *PredLoop = LI->getLoopFor(ExitPred);
- if (!PredLoop || PredLoop->contains(Exit) ||
- ExitPred->getTerminator()->isIndirectTerminator())
- continue;
- SplitLatchEdge |= L->getLoopLatch() == ExitPred;
- BasicBlock *ExitSplit = SplitCriticalEdge(
- ExitPred, Exit,
- CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
- ExitSplit->moveBefore(Exit);
- }
- assert(SplitLatchEdge &&
- "Despite splitting all preds, failed to split latch exit?");
- } else {
- // We can fold the conditional branch in the preheader, this makes things
- // simpler. The first step is to remove the extra edge to the Exit block.
- Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
- BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
- NewBI->setDebugLoc(PHBI->getDebugLoc());
- PHBI->eraseFromParent();
-
- // With our CFG finalized, update DomTree if it is available.
- if (DT) DT->deleteEdge(OrigPreheader, Exit);
-
- // Update MSSA too, if available.
- if (MSSAU)
- MSSAU->removeEdge(OrigPreheader, Exit);
- }
-
- assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
- assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
-
- if (MSSAU && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
-
- // Now that the CFG and DomTree are in a consistent state again, try to merge
- // the OrigHeader block into OrigLatch. This will succeed if they are
- // connected by an unconditional branch. This is just a cleanup so the
- // emitted code isn't too gross in this common case.
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
- MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU);
-
- if (MSSAU && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
-
- LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump());
-
- ++NumRotated;
- return true;
-}
-
-/// Determine whether the instructions in this range may be safely and cheaply
-/// speculated. This is not an important enough situation to develop complex
-/// heuristics. We handle a single arithmetic instruction along with any type
-/// conversions.
-static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
- BasicBlock::iterator End, Loop *L) {
- bool seenIncrement = false;
- bool MultiExitLoop = false;
-
- if (!L->getExitingBlock())
- MultiExitLoop = true;
-
- for (BasicBlock::iterator I = Begin; I != End; ++I) {
-
- if (!isSafeToSpeculativelyExecute(&*I))
- return false;
-
- if (isa<DbgInfoIntrinsic>(I))
- continue;
-
- switch (I->getOpcode()) {
- default:
- return false;
- case Instruction::GetElementPtr:
- // GEPs are cheap if all indices are constant.
- if (!cast<GEPOperator>(I)->hasAllConstantIndices())
- return false;
- // fall-thru to increment case
- LLVM_FALLTHROUGH;
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr: {
- Value *IVOpnd =
- !isa<Constant>(I->getOperand(0))
- ? I->getOperand(0)
- : !isa<Constant>(I->getOperand(1)) ? I->getOperand(1) : nullptr;
- if (!IVOpnd)
- return false;
-
- // If increment operand is used outside of the loop, this speculation
- // could cause extra live range interference.
- if (MultiExitLoop) {
- for (User *UseI : IVOpnd->users()) {
- auto *UserInst = cast<Instruction>(UseI);
- if (!L->contains(UserInst))
- return false;
- }
- }
-
- if (seenIncrement)
- return false;
- seenIncrement = true;
- break;
- }
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::SExt:
- // ignore type conversions
- break;
- }
- }
- return true;
-}
-
-/// Fold the loop tail into the loop exit by speculating the loop tail
-/// instructions. Typically, this is a single post-increment. In the case of a
-/// simple 2-block loop, hoisting the increment can be much better than
-/// duplicating the entire loop header. In the case of loops with early exits,
-/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
-/// canonical form so downstream passes can handle it.
-///
-/// I don't believe this invalidates SCEV.
-bool LoopRotate::simplifyLoopLatch(Loop *L) {
- BasicBlock *Latch = L->getLoopLatch();
- if (!Latch || Latch->hasAddressTaken())
- return false;
-
- BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
- if (!Jmp || !Jmp->isUnconditional())
- return false;
-
- BasicBlock *LastExit = Latch->getSinglePredecessor();
- if (!LastExit || !L->isLoopExiting(LastExit))
- return false;
-
- BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
- if (!BI)
- return false;
-
- if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L))
- return false;
-
- LLVM_DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
- << LastExit->getName() << "\n");
-
- // Hoist the instructions from Latch into LastExit.
- Instruction *FirstLatchInst = &*(Latch->begin());
- LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(),
- Latch->begin(), Jmp->getIterator());
-
- // Update MemorySSA
- if (MSSAU)
- MSSAU->moveAllAfterMergeBlocks(Latch, LastExit, FirstLatchInst);
-
- unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
- BasicBlock *Header = Jmp->getSuccessor(0);
- assert(Header == L->getHeader() && "expected a backward branch");
-
- // Remove Latch from the CFG so that LastExit becomes the new Latch.
- BI->setSuccessor(FallThruPath, Header);
- Latch->replaceSuccessorsPhiUsesWith(LastExit);
- Jmp->eraseFromParent();
-
- // Nuke the Latch block.
- assert(Latch->empty() && "unable to evacuate Latch");
- LI->removeBlock(Latch);
- if (DT)
- DT->eraseNode(Latch);
- Latch->eraseFromParent();
-
- if (MSSAU && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
-
- return true;
-}
-
-/// Rotate \c L, and return true if any modification was made.
-bool LoopRotate::processLoop(Loop *L) {
- // Save the loop metadata.
- MDNode *LoopMD = L->getLoopID();
-
- bool SimplifiedLatch = false;
-
- // Simplify the loop latch before attempting to rotate the header
- // upward. Rotation may not be needed if the loop tail can be folded into the
- // loop exit.
- if (!RotationOnly)
- SimplifiedLatch = simplifyLoopLatch(L);
-
- bool MadeChange = rotateLoop(L, SimplifiedLatch);
- assert((!MadeChange || L->isLoopExiting(L->getLoopLatch())) &&
- "Loop latch should be exiting after loop-rotate.");
-
- // Restore the loop metadata.
- // NB! We presume LoopRotation DOESN'T ADD its own metadata.
- if ((MadeChange || SimplifiedLatch) && LoopMD)
- L->setLoopID(LoopMD);
-
- return MadeChange || SimplifiedLatch;
-}
-
-
-/// The utility to convert a loop into a loop with bottom test.
-bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
- AssumptionCache *AC, DominatorTree *DT,
- ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
- const SimplifyQuery &SQ, bool RotationOnly = true,
- unsigned Threshold = unsigned(-1),
- bool IsUtilMode = true) {
- if (MSSAU && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
- LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
- IsUtilMode);
- if (MSSAU && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
-
- return LR.processLoop(L);
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
deleted file mode 100644
index 7e6da02d5707..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ /dev/null
@@ -1,920 +0,0 @@
-//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass performs several transformations to transform natural loops into a
-// simpler form, which makes subsequent analyses and transformations simpler and
-// more effective.
-//
-// Loop pre-header insertion guarantees that there is a single, non-critical
-// entry edge from outside of the loop to the loop header. This simplifies a
-// number of analyses and transformations, such as LICM.
-//
-// Loop exit-block insertion guarantees that all exit blocks from the loop
-// (blocks which are outside of the loop that have predecessors inside of the
-// loop) only have predecessors from inside of the loop (and are thus dominated
-// by the loop header). This simplifies transformations such as store-sinking
-// that are built into LICM.
-//
-// This pass also guarantees that loops will have exactly one backedge.
-//
-// Indirectbr instructions introduce several complications. If the loop
-// contains or is entered by an indirectbr instruction, it may not be possible
-// to transform the loop and make these guarantees. Client code should check
-// that these conditions are true before relying on them.
-//
-// Similar complications arise from callbr instructions, particularly in
-// asm-goto where blockaddress expressions are used.
-//
-// Note that the simplifycfg pass will clean up blocks which are split out but
-// end up being unnecessary, so usage of this pass should not pessimize
-// generated code.
-//
-// This pass obviously modifies the CFG, but updates loop information and
-// dominator information.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/LoopSimplify.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/DependenceAnalysis.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MemorySSA.h"
-#include "llvm/Analysis/MemorySSAUpdater.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "loop-simplify"
-
-STATISTIC(NumNested , "Number of nested loops split out");
-
-// If the block isn't already, move the new block to right after some 'outside
-// block' block. This prevents the preheader from being placed inside the loop
-// body, e.g. when the loop hasn't been rotated.
-static void placeSplitBlockCarefully(BasicBlock *NewBB,
- SmallVectorImpl<BasicBlock *> &SplitPreds,
- Loop *L) {
- // Check to see if NewBB is already well placed.
- Function::iterator BBI = --NewBB->getIterator();
- for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
- if (&*BBI == SplitPreds[i])
- return;
- }
-
- // If it isn't already after an outside block, move it after one. This is
- // always good as it makes the uncond branch from the outside block into a
- // fall-through.
-
- // Figure out *which* outside block to put this after. Prefer an outside
- // block that neighbors a BB actually in the loop.
- BasicBlock *FoundBB = nullptr;
- for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
- Function::iterator BBI = SplitPreds[i]->getIterator();
- if (++BBI != NewBB->getParent()->end() && L->contains(&*BBI)) {
- FoundBB = SplitPreds[i];
- break;
- }
- }
-
- // If our heuristic for a *good* bb to place this after doesn't find
- // anything, just pick something. It's likely better than leaving it within
- // the loop.
- if (!FoundBB)
- FoundBB = SplitPreds[0];
- NewBB->moveAfter(FoundBB);
-}
-
-/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a
-/// preheader, this method is called to insert one. This method has two phases:
-/// preheader insertion and analysis updating.
-///
-BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
- LoopInfo *LI, MemorySSAUpdater *MSSAU,
- bool PreserveLCSSA) {
- BasicBlock *Header = L->getHeader();
-
- // Compute the set of predecessors of the loop that are not in the loop.
- SmallVector<BasicBlock*, 8> OutsideBlocks;
- for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
- PI != PE; ++PI) {
- BasicBlock *P = *PI;
- if (!L->contains(P)) { // Coming in from outside the loop?
- // If the loop is branched to from an indirect terminator, we won't
- // be able to fully transform the loop, because it prohibits
- // edge splitting.
- if (P->getTerminator()->isIndirectTerminator())
- return nullptr;
-
- // Keep track of it.
- OutsideBlocks.push_back(P);
- }
- }
-
- // Split out the loop pre-header.
- BasicBlock *PreheaderBB;
- PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT,
- LI, MSSAU, PreserveLCSSA);
- if (!PreheaderBB)
- return nullptr;
-
- LLVM_DEBUG(dbgs() << "LoopSimplify: Creating pre-header "
- << PreheaderBB->getName() << "\n");
-
- // Make sure that NewBB is put someplace intelligent, which doesn't mess up
- // code layout too horribly.
- placeSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
-
- return PreheaderBB;
-}
-
-/// Add the specified block, and all of its predecessors, to the specified set,
-/// if it's not already in there. Stop predecessor traversal when we reach
-/// StopBlock.
-static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
- std::set<BasicBlock*> &Blocks) {
- SmallVector<BasicBlock *, 8> Worklist;
- Worklist.push_back(InputBB);
- do {
- BasicBlock *BB = Worklist.pop_back_val();
- if (Blocks.insert(BB).second && BB != StopBlock)
- // If BB is not already processed and it is not a stop block then
- // insert its predecessor in the work list
- for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
- BasicBlock *WBB = *I;
- Worklist.push_back(WBB);
- }
- } while (!Worklist.empty());
-}
-
-/// The first part of loop-nestification is to find a PHI node that tells
-/// us how to partition the loops.
-static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,
- AssumptionCache *AC) {
- const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
- for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
- PHINode *PN = cast<PHINode>(I);
- ++I;
- if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) {
- // This is a degenerate PHI already, don't modify it!
- PN->replaceAllUsesWith(V);
- PN->eraseFromParent();
- continue;
- }
-
- // Scan this PHI node looking for a use of the PHI node by itself.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) == PN &&
- L->contains(PN->getIncomingBlock(i)))
- // We found something tasty to remove.
- return PN;
- }
- return nullptr;
-}
-
-/// If this loop has multiple backedges, try to pull one of them out into
-/// a nested loop.
-///
-/// This is important for code that looks like
-/// this:
-///
-/// Loop:
-/// ...
-/// br cond, Loop, Next
-/// ...
-/// br cond2, Loop, Out
-///
-/// To identify this common case, we look at the PHI nodes in the header of the
-/// loop. PHI nodes with unchanging values on one backedge correspond to values
-/// that change in the "outer" loop, but not in the "inner" loop.
-///
-/// If we are able to separate out a loop, return the new outer loop that was
-/// created.
-///
-static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
- DominatorTree *DT, LoopInfo *LI,
- ScalarEvolution *SE, bool PreserveLCSSA,
- AssumptionCache *AC, MemorySSAUpdater *MSSAU) {
- // Don't try to separate loops without a preheader.
- if (!Preheader)
- return nullptr;
-
- // The header is not a landing pad; preheader insertion should ensure this.
- BasicBlock *Header = L->getHeader();
- assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
-
- PHINode *PN = findPHIToPartitionLoops(L, DT, AC);
- if (!PN) return nullptr; // No known way to partition.
-
- // Pull out all predecessors that have varying values in the loop. This
- // handles the case when a PHI node has multiple instances of itself as
- // arguments.
- SmallVector<BasicBlock*, 8> OuterLoopPreds;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- if (PN->getIncomingValue(i) != PN ||
- !L->contains(PN->getIncomingBlock(i))) {
- // We can't split indirect control flow edges.
- if (PN->getIncomingBlock(i)->getTerminator()->isIndirectTerminator())
- return nullptr;
- OuterLoopPreds.push_back(PN->getIncomingBlock(i));
- }
- }
- LLVM_DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
-
- // If ScalarEvolution is around and knows anything about values in
- // this loop, tell it to forget them, because we're about to
- // substantially change it.
- if (SE)
- SE->forgetLoop(L);
-
- BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer",
- DT, LI, MSSAU, PreserveLCSSA);
-
- // Make sure that NewBB is put someplace intelligent, which doesn't mess up
- // code layout too horribly.
- placeSplitBlockCarefully(NewBB, OuterLoopPreds, L);
-
- // Create the new outer loop.
- Loop *NewOuter = LI->AllocateLoop();
-
- // Change the parent loop to use the outer loop as its child now.
- if (Loop *Parent = L->getParentLoop())
- Parent->replaceChildLoopWith(L, NewOuter);
- else
- LI->changeTopLevelLoop(L, NewOuter);
-
- // L is now a subloop of our outer loop.
- NewOuter->addChildLoop(L);
-
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I)
- NewOuter->addBlockEntry(*I);
-
- // Now reset the header in L, which had been moved by
- // SplitBlockPredecessors for the outer loop.
- L->moveToHeader(Header);
-
- // Determine which blocks should stay in L and which should be moved out to
- // the Outer loop now.
- std::set<BasicBlock*> BlocksInL;
- for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) {
- BasicBlock *P = *PI;
- if (DT->dominates(Header, P))
- addBlockAndPredsToSet(P, Header, BlocksInL);
- }
-
- // Scan all of the loop children of L, moving them to OuterLoop if they are
- // not part of the inner loop.
- const std::vector<Loop*> &SubLoops = L->getSubLoops();
- for (size_t I = 0; I != SubLoops.size(); )
- if (BlocksInL.count(SubLoops[I]->getHeader()))
- ++I; // Loop remains in L
- else
- NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I));
-
- SmallVector<BasicBlock *, 8> OuterLoopBlocks;
- OuterLoopBlocks.push_back(NewBB);
- // Now that we know which blocks are in L and which need to be moved to
- // OuterLoop, move any blocks that need it.
- for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
- BasicBlock *BB = L->getBlocks()[i];
- if (!BlocksInL.count(BB)) {
- // Move this block to the parent, updating the exit blocks sets
- L->removeBlockFromLoop(BB);
- if ((*LI)[BB] == L) {
- LI->changeLoopFor(BB, NewOuter);
- OuterLoopBlocks.push_back(BB);
- }
- --i;
- }
- }
-
- // Split edges to exit blocks from the inner loop, if they emerged in the
- // process of separating the outer one.
- formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA);
-
- if (PreserveLCSSA) {
- // Fix LCSSA form for L. Some values, which previously were only used inside
- // L, can now be used in NewOuter loop. We need to insert phi-nodes for them
- // in corresponding exit blocks.
- // We don't need to form LCSSA recursively, because there cannot be uses
- // inside a newly created loop of defs from inner loops as those would
- // already be a use of an LCSSA phi node.
- formLCSSA(*L, *DT, LI, SE);
-
- assert(NewOuter->isRecursivelyLCSSAForm(*DT, *LI) &&
- "LCSSA is broken after separating nested loops!");
- }
-
- return NewOuter;
-}
-
-/// This method is called when the specified loop has more than one
-/// backedge in it.
-///
-/// If this occurs, revector all of these backedges to target a new basic block
-/// and have that block branch to the loop header. This ensures that loops
-/// have exactly one backedge.
-static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
- DominatorTree *DT, LoopInfo *LI,
- MemorySSAUpdater *MSSAU) {
- assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
-
- // Get information about the loop
- BasicBlock *Header = L->getHeader();
- Function *F = Header->getParent();
-
- // Unique backedge insertion currently depends on having a preheader.
- if (!Preheader)
- return nullptr;
-
- // The header is not an EH pad; preheader insertion should ensure this.
- assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
-
- // Figure out which basic blocks contain back-edges to the loop header.
- std::vector<BasicBlock*> BackedgeBlocks;
- for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
- BasicBlock *P = *I;
-
- // Indirect edges cannot be split, so we must fail if we find one.
- if (P->getTerminator()->isIndirectTerminator())
- return nullptr;
-
- if (P != Preheader) BackedgeBlocks.push_back(P);
- }
-
- // Create and insert the new backedge block...
- BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
- Header->getName() + ".backedge", F);
- BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
- BETerminator->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());
-
- LLVM_DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
- << BEBlock->getName() << "\n");
-
- // Move the new backedge block to right after the last backedge block.
- Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator();
- F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);
-
- // Now that the block has been inserted into the function, create PHI nodes in
- // the backedge block which correspond to any PHI nodes in the header block.
- for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
- PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(),
- PN->getName()+".be", BETerminator);
-
- // Loop over the PHI node, moving all entries except the one for the
- // preheader over to the new PHI node.
- unsigned PreheaderIdx = ~0U;
- bool HasUniqueIncomingValue = true;
- Value *UniqueValue = nullptr;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- BasicBlock *IBB = PN->getIncomingBlock(i);
- Value *IV = PN->getIncomingValue(i);
- if (IBB == Preheader) {
- PreheaderIdx = i;
- } else {
- NewPN->addIncoming(IV, IBB);
- if (HasUniqueIncomingValue) {
- if (!UniqueValue)
- UniqueValue = IV;
- else if (UniqueValue != IV)
- HasUniqueIncomingValue = false;
- }
- }
- }
-
- // Delete all of the incoming values from the old PN except the preheader's
- assert(PreheaderIdx != ~0U && "PHI has no preheader entry??");
- if (PreheaderIdx != 0) {
- PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx));
- PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx));
- }
- // Nuke all entries except the zero'th.
- for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i)
- PN->removeIncomingValue(e-i, false);
-
- // Finally, add the newly constructed PHI node as the entry for the BEBlock.
- PN->addIncoming(NewPN, BEBlock);
-
- // As an optimization, if all incoming values in the new PhiNode (which is a
- // subset of the incoming values of the old PHI node) have the same value,
- // eliminate the PHI Node.
- if (HasUniqueIncomingValue) {
- NewPN->replaceAllUsesWith(UniqueValue);
- BEBlock->getInstList().erase(NewPN);
- }
- }
-
- // Now that all of the PHI nodes have been inserted and adjusted, modify the
- // backedge blocks to jump to the BEBlock instead of the header.
- // If one of the backedges has llvm.loop metadata attached, we remove
- // it from the backedge and add it to BEBlock.
- unsigned LoopMDKind = BEBlock->getContext().getMDKindID("llvm.loop");
- MDNode *LoopMD = nullptr;
- for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) {
- Instruction *TI = BackedgeBlocks[i]->getTerminator();
- if (!LoopMD)
- LoopMD = TI->getMetadata(LoopMDKind);
- TI->setMetadata(LoopMDKind, nullptr);
- TI->replaceSuccessorWith(Header, BEBlock);
- }
- BEBlock->getTerminator()->setMetadata(LoopMDKind, LoopMD);
-
- //===--- Update all analyses which we must preserve now -----------------===//
-
- // Update Loop Information - we know that this block is now in the current
- // loop and all parent loops.
- L->addBasicBlockToLoop(BEBlock, *LI);
-
- // Update dominator information
- DT->splitBlock(BEBlock);
-
- if (MSSAU)
- MSSAU->updatePhisWhenInsertingUniqueBackedgeBlock(Header, Preheader,
- BEBlock);
-
- return BEBlock;
-}
-
-/// Simplify one loop and queue further loops for simplification.
-static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
- DominatorTree *DT, LoopInfo *LI,
- ScalarEvolution *SE, AssumptionCache *AC,
- MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
- bool Changed = false;
- if (MSSAU && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
-
-ReprocessLoop:
-
- // Check to see that no blocks (other than the header) in this loop have
- // predecessors that are not in the loop. This is not valid for natural
- // loops, but can occur if the blocks are unreachable. Since they are
- // unreachable we can just shamelessly delete those CFG edges!
- for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
- BB != E; ++BB) {
- if (*BB == L->getHeader()) continue;
-
- SmallPtrSet<BasicBlock*, 4> BadPreds;
- for (pred_iterator PI = pred_begin(*BB),
- PE = pred_end(*BB); PI != PE; ++PI) {
- BasicBlock *P = *PI;
- if (!L->contains(P))
- BadPreds.insert(P);
- }
-
- // Delete each unique out-of-loop (and thus dead) predecessor.
- for (BasicBlock *P : BadPreds) {
-
- LLVM_DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
- << P->getName() << "\n");
-
- // Zap the dead pred's terminator and replace it with unreachable.
- Instruction *TI = P->getTerminator();
- changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA,
- /*DTU=*/nullptr, MSSAU);
- Changed = true;
- }
- }
-
- if (MSSAU && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
-
- // If there are exiting blocks with branches on undef, resolve the undef in
- // the direction which will exit the loop. This will help simplify loop
- // trip count computations.
- SmallVector<BasicBlock*, 8> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
- for (BasicBlock *ExitingBlock : ExitingBlocks)
- if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()))
- if (BI->isConditional()) {
- if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
-
- LLVM_DEBUG(dbgs()
- << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
- << ExitingBlock->getName() << "\n");
-
- BI->setCondition(ConstantInt::get(Cond->getType(),
- !L->contains(BI->getSuccessor(0))));
-
- Changed = true;
- }
- }
-
- // Does the loop already have a preheader? If so, don't insert one.
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader) {
- Preheader = InsertPreheaderForLoop(L, DT, LI, MSSAU, PreserveLCSSA);
- if (Preheader)
- Changed = true;
- }
-
- // Next, check to make sure that all exit nodes of the loop only have
- // predecessors that are inside of the loop. This check guarantees that the
- // loop preheader/header will dominate the exit blocks. If the exit block has
- // predecessors from outside of the loop, split the edge now.
- if (formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA))
- Changed = true;
-
- if (MSSAU && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
-
- // If the header has more than two predecessors at this point (from the
- // preheader and from multiple backedges), we must adjust the loop.
- BasicBlock *LoopLatch = L->getLoopLatch();
- if (!LoopLatch) {
- // If this is really a nested loop, rip it out into a child loop. Don't do
- // this for loops with a giant number of backedges, just factor them into a
- // common backedge instead.
- if (L->getNumBackEdges() < 8) {
- if (Loop *OuterL = separateNestedLoop(L, Preheader, DT, LI, SE,
- PreserveLCSSA, AC, MSSAU)) {
- ++NumNested;
- // Enqueue the outer loop as it should be processed next in our
- // depth-first nest walk.
- Worklist.push_back(OuterL);
-
- // This is a big restructuring change, reprocess the whole loop.
- Changed = true;
- // GCC doesn't tail recursion eliminate this.
- // FIXME: It isn't clear we can't rely on LLVM to TRE this.
- goto ReprocessLoop;
- }
- }
-
- // If we either couldn't, or didn't want to, identify nesting of the loops,
- // insert a new block that all backedges target, then make it jump to the
- // loop header.
- LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI, MSSAU);
- if (LoopLatch)
- Changed = true;
- }
-
- if (MSSAU && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
-
- const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
-
- // Scan over the PHI nodes in the loop header. Since they now have only two
- // incoming values (the loop is canonicalized), we may have simplified the PHI
- // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
- PHINode *PN;
- for (BasicBlock::iterator I = L->getHeader()->begin();
- (PN = dyn_cast<PHINode>(I++)); )
- if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) {
- if (SE) SE->forgetValue(PN);
- if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) {
- PN->replaceAllUsesWith(V);
- PN->eraseFromParent();
- }
- }
-
- // If this loop has multiple exits and the exits all go to the same
- // block, attempt to merge the exits. This helps several passes, such
- // as LoopRotation, which do not support loops with multiple exits.
- // SimplifyCFG also does this (and this code uses the same utility
- // function), however this code is loop-aware, where SimplifyCFG is
- // not. That gives it the advantage of being able to hoist
- // loop-invariant instructions out of the way to open up more
- // opportunities, and the disadvantage of having the responsibility
- // to preserve dominator information.
- auto HasUniqueExitBlock = [&]() {
- BasicBlock *UniqueExit = nullptr;
- for (auto *ExitingBB : ExitingBlocks)
- for (auto *SuccBB : successors(ExitingBB)) {
- if (L->contains(SuccBB))
- continue;
-
- if (!UniqueExit)
- UniqueExit = SuccBB;
- else if (UniqueExit != SuccBB)
- return false;
- }
-
- return true;
- };
- if (HasUniqueExitBlock()) {
- for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
- BasicBlock *ExitingBlock = ExitingBlocks[i];
- if (!ExitingBlock->getSinglePredecessor()) continue;
- BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
- if (!BI || !BI->isConditional()) continue;
- CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
- if (!CI || CI->getParent() != ExitingBlock) continue;
-
- // Attempt to hoist out all instructions except for the
- // comparison and the branch.
- bool AllInvariant = true;
- bool AnyInvariant = false;
- for (auto I = ExitingBlock->instructionsWithoutDebug().begin(); &*I != BI; ) {
- Instruction *Inst = &*I++;
- if (Inst == CI)
- continue;
- if (!L->makeLoopInvariant(
- Inst, AnyInvariant,
- Preheader ? Preheader->getTerminator() : nullptr, MSSAU)) {
- AllInvariant = false;
- break;
- }
- }
- if (AnyInvariant) {
- Changed = true;
- // The loop disposition of all SCEV expressions that depend on any
- // hoisted values have also changed.
- if (SE)
- SE->forgetLoopDispositions(L);
- }
- if (!AllInvariant) continue;
-
- // The block has now been cleared of all instructions except for
- // a comparison and a conditional branch. SimplifyCFG may be able
- // to fold it now.
- if (!FoldBranchToCommonDest(BI, MSSAU))
- continue;
-
- // Success. The block is now dead, so remove it from the loop,
- // update the dominator tree and delete it.
- LLVM_DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
- << ExitingBlock->getName() << "\n");
-
- assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
- Changed = true;
- LI->removeBlock(ExitingBlock);
-
- DomTreeNode *Node = DT->getNode(ExitingBlock);
- const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
- Node->getChildren();
- while (!Children.empty()) {
- DomTreeNode *Child = Children.front();
- DT->changeImmediateDominator(Child, Node->getIDom());
- }
- DT->eraseNode(ExitingBlock);
- if (MSSAU) {
- SmallSetVector<BasicBlock *, 8> ExitBlockSet;
- ExitBlockSet.insert(ExitingBlock);
- MSSAU->removeBlocks(ExitBlockSet);
- }
-
- BI->getSuccessor(0)->removePredecessor(
- ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA);
- BI->getSuccessor(1)->removePredecessor(
- ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA);
- ExitingBlock->eraseFromParent();
- }
- }
-
- // Changing exit conditions for blocks may affect exit counts of this loop and
- // any of its paretns, so we must invalidate the entire subtree if we've made
- // any changes.
- if (Changed && SE)
- SE->forgetTopmostLoop(L);
-
- if (MSSAU && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
-
- return Changed;
-}
-
-bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
- ScalarEvolution *SE, AssumptionCache *AC,
- MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
- bool Changed = false;
-
-#ifndef NDEBUG
- // If we're asked to preserve LCSSA, the loop nest needs to start in LCSSA
- // form.
- if (PreserveLCSSA) {
- assert(DT && "DT not available.");
- assert(LI && "LI not available.");
- assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
- "Requested to preserve LCSSA, but it's already broken.");
- }
-#endif
-
- // Worklist maintains our depth-first queue of loops in this nest to process.
- SmallVector<Loop *, 4> Worklist;
- Worklist.push_back(L);
-
- // Walk the worklist from front to back, pushing newly found sub loops onto
- // the back. This will let us process loops from back to front in depth-first
- // order. We can use this simple process because loops form a tree.
- for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
- Loop *L2 = Worklist[Idx];
- Worklist.append(L2->begin(), L2->end());
- }
-
- while (!Worklist.empty())
- Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE,
- AC, MSSAU, PreserveLCSSA);
-
- return Changed;
-}
-
-namespace {
- struct LoopSimplify : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- LoopSimplify() : FunctionPass(ID) {
- initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
-
- // We need loop information to identify the loops...
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
-
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
-
- AU.addPreserved<BasicAAWrapperPass>();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<ScalarEvolutionWrapperPass>();
- AU.addPreserved<SCEVAAWrapperPass>();
- AU.addPreservedID(LCSSAID);
- AU.addPreserved<DependenceAnalysisWrapperPass>();
- AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
- AU.addPreserved<BranchProbabilityInfoWrapperPass>();
- if (EnableMSSALoopDependency)
- AU.addPreserved<MemorySSAWrapperPass>();
- }
-
- /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
- void verifyAnalysis() const override;
- };
-}
-
-char LoopSimplify::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
- "Canonicalize natural loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
- "Canonicalize natural loops", false, false)
-
-// Publicly exposed interface to pass...
-char &llvm::LoopSimplifyID = LoopSimplify::ID;
-Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
-
-/// runOnFunction - Run down all loops in the CFG (recursively, but we could do
-/// it in any convenient order) inserting preheaders...
-///
-bool LoopSimplify::runOnFunction(Function &F) {
- bool Changed = false;
- LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
- ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr;
- AssumptionCache *AC =
- &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- MemorySSA *MSSA = nullptr;
- std::unique_ptr<MemorySSAUpdater> MSSAU;
- if (EnableMSSALoopDependency) {
- auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
- if (MSSAAnalysis) {
- MSSA = &MSSAAnalysis->getMSSA();
- MSSAU = make_unique<MemorySSAUpdater>(MSSA);
- }
- }
-
- bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
-
- // Simplify each loop nest in the function.
- for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- Changed |= simplifyLoop(*I, DT, LI, SE, AC, MSSAU.get(), PreserveLCSSA);
-
-#ifndef NDEBUG
- if (PreserveLCSSA) {
- bool InLCSSA = all_of(
- *LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT, *LI); });
- assert(InLCSSA && "LCSSA is broken after loop-simplify.");
- }
-#endif
- return Changed;
-}
-
-PreservedAnalyses LoopSimplifyPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- bool Changed = false;
- LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
- DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
- ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F);
- AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
-
- // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA
- // after simplifying the loops. MemorySSA is not preserved either.
- for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- Changed |=
- simplifyLoop(*I, DT, LI, SE, AC, nullptr, /*PreserveLCSSA*/ false);
-
- if (!Changed)
- return PreservedAnalyses::all();
-
- PreservedAnalyses PA;
- PA.preserve<DominatorTreeAnalysis>();
- PA.preserve<LoopAnalysis>();
- PA.preserve<BasicAA>();
- PA.preserve<GlobalsAA>();
- PA.preserve<SCEVAA>();
- PA.preserve<ScalarEvolutionAnalysis>();
- PA.preserve<DependenceAnalysis>();
- // BPI maps conditional terminators to probabilities, LoopSimplify can insert
- // blocks, but it does so only by splitting existing blocks and edges. This
- // results in the interesting property that all new terminators inserted are
- // unconditional branches which do not appear in BPI. All deletions are
- // handled via ValueHandle callbacks w/in BPI.
- PA.preserve<BranchProbabilityAnalysis>();
- return PA;
-}
-
-// FIXME: Restore this code when we re-enable verification in verifyAnalysis
-// below.
-#if 0
-static void verifyLoop(Loop *L) {
- // Verify subloops.
- for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
- verifyLoop(*I);
-
- // It used to be possible to just assert L->isLoopSimplifyForm(), however
- // with the introduction of indirectbr, there are now cases where it's
- // not possible to transform a loop as necessary. We can at least check
- // that there is an indirectbr near any time there's trouble.
-
- // Indirectbr can interfere with preheader and unique backedge insertion.
- if (!L->getLoopPreheader() || !L->getLoopLatch()) {
- bool HasIndBrPred = false;
- for (pred_iterator PI = pred_begin(L->getHeader()),
- PE = pred_end(L->getHeader()); PI != PE; ++PI)
- if (isa<IndirectBrInst>((*PI)->getTerminator())) {
- HasIndBrPred = true;
- break;
- }
- assert(HasIndBrPred &&
- "LoopSimplify has no excuse for missing loop header info!");
- (void)HasIndBrPred;
- }
-
- // Indirectbr can interfere with exit block canonicalization.
- if (!L->hasDedicatedExits()) {
- bool HasIndBrExiting = false;
- SmallVector<BasicBlock*, 8> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
- for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
- if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) {
- HasIndBrExiting = true;
- break;
- }
- }
-
- assert(HasIndBrExiting &&
- "LoopSimplify has no excuse for missing exit block info!");
- (void)HasIndBrExiting;
- }
-}
-#endif
-
-void LoopSimplify::verifyAnalysis() const {
- // FIXME: This routine is being called mid-way through the loop pass manager
- // as loop passes destroy this analysis. That's actually fine, but we have no
- // way of expressing that here. Once all of the passes that destroy this are
- // hoisted out of the loop pass manager we can add back verification here.
-#if 0
- for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- verifyLoop(*I);
-#endif
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
deleted file mode 100644
index 4a1edb3700c0..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ /dev/null
@@ -1,978 +0,0 @@
-//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements some loop unrolling utilities. It does not define any
-// actual pass or policy, but provides a single function to perform loop
-// unrolling.
-//
-// The process of unrolling can produce extraneous basic blocks linked with
-// unconditional branches. This will be corrected in the future.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/LoopSimplify.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Utils/SimplifyIndVar.h"
-#include "llvm/Transforms/Utils/UnrollLoop.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "loop-unroll"
-
-// TODO: Should these be here or in LoopUnroll?
-STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
-STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
-STATISTIC(NumUnrolledWithHeader, "Number of loops unrolled without a "
- "conditional latch (completely or otherwise)");
-
-static cl::opt<bool>
-UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
- cl::desc("Allow runtime unrolled loops to be unrolled "
- "with epilog instead of prolog."));
-
-static cl::opt<bool>
-UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
- cl::desc("Verify domtree after unrolling"),
-#ifdef EXPENSIVE_CHECKS
- cl::init(true)
-#else
- cl::init(false)
-#endif
- );
-
-/// Convert the instruction operands from referencing the current values into
-/// those specified by VMap.
-void llvm::remapInstruction(Instruction *I, ValueToValueMapTy &VMap) {
- for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
- Value *Op = I->getOperand(op);
-
- // Unwrap arguments of dbg.value intrinsics.
- bool Wrapped = false;
- if (auto *V = dyn_cast<MetadataAsValue>(Op))
- if (auto *Unwrapped = dyn_cast<ValueAsMetadata>(V->getMetadata())) {
- Op = Unwrapped->getValue();
- Wrapped = true;
- }
-
- auto wrap = [&](Value *V) {
- auto &C = I->getContext();
- return Wrapped ? MetadataAsValue::get(C, ValueAsMetadata::get(V)) : V;
- };
-
- ValueToValueMapTy::iterator It = VMap.find(Op);
- if (It != VMap.end())
- I->setOperand(op, wrap(It->second));
- }
-
- if (PHINode *PN = dyn_cast<PHINode>(I)) {
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i));
- if (It != VMap.end())
- PN->setIncomingBlock(i, cast<BasicBlock>(It->second));
- }
- }
-}
-
-/// Check if unrolling created a situation where we need to insert phi nodes to
-/// preserve LCSSA form.
-/// \param Blocks is a vector of basic blocks representing unrolled loop.
-/// \param L is the outer loop.
-/// It's possible that some of the blocks are in L, and some are not. In this
-/// case, if there is a use is outside L, and definition is inside L, we need to
-/// insert a phi-node, otherwise LCSSA will be broken.
-/// The function is just a helper function for llvm::UnrollLoop that returns
-/// true if this situation occurs, indicating that LCSSA needs to be fixed.
-static bool needToInsertPhisForLCSSA(Loop *L, std::vector<BasicBlock *> Blocks,
- LoopInfo *LI) {
- for (BasicBlock *BB : Blocks) {
- if (LI->getLoopFor(BB) == L)
- continue;
- for (Instruction &I : *BB) {
- for (Use &U : I.operands()) {
- if (auto Def = dyn_cast<Instruction>(U)) {
- Loop *DefLoop = LI->getLoopFor(Def->getParent());
- if (!DefLoop)
- continue;
- if (DefLoop->contains(L))
- return true;
- }
- }
- }
- }
- return false;
-}
-
-/// Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary
-/// and adds a mapping from the original loop to the new loop to NewLoops.
-/// Returns nullptr if no new loop was created and a pointer to the
-/// original loop OriginalBB was part of otherwise.
-const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB,
- BasicBlock *ClonedBB, LoopInfo *LI,
- NewLoopsMap &NewLoops) {
- // Figure out which loop New is in.
- const Loop *OldLoop = LI->getLoopFor(OriginalBB);
- assert(OldLoop && "Should (at least) be in the loop being unrolled!");
-
- Loop *&NewLoop = NewLoops[OldLoop];
- if (!NewLoop) {
- // Found a new sub-loop.
- assert(OriginalBB == OldLoop->getHeader() &&
- "Header should be first in RPO");
-
- NewLoop = LI->AllocateLoop();
- Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop());
-
- if (NewLoopParent)
- NewLoopParent->addChildLoop(NewLoop);
- else
- LI->addTopLevelLoop(NewLoop);
-
- NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
- return OldLoop;
- } else {
- NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
- return nullptr;
- }
-}
-
-/// The function chooses which type of unroll (epilog or prolog) is more
-/// profitabale.
-/// Epilog unroll is more profitable when there is PHI that starts from
-/// constant. In this case epilog will leave PHI start from constant,
-/// but prolog will convert it to non-constant.
-///
-/// loop:
-/// PN = PHI [I, Latch], [CI, PreHeader]
-/// I = foo(PN)
-/// ...
-///
-/// Epilog unroll case.
-/// loop:
-/// PN = PHI [I2, Latch], [CI, PreHeader]
-/// I1 = foo(PN)
-/// I2 = foo(I1)
-/// ...
-/// Prolog unroll case.
-/// NewPN = PHI [PrologI, Prolog], [CI, PreHeader]
-/// loop:
-/// PN = PHI [I2, Latch], [NewPN, PreHeader]
-/// I1 = foo(PN)
-/// I2 = foo(I1)
-/// ...
-///
-static bool isEpilogProfitable(Loop *L) {
- BasicBlock *PreHeader = L->getLoopPreheader();
- BasicBlock *Header = L->getHeader();
- assert(PreHeader && Header);
- for (const PHINode &PN : Header->phis()) {
- if (isa<ConstantInt>(PN.getIncomingValueForBlock(PreHeader)))
- return true;
- }
- return false;
-}
-
-/// Perform some cleanup and simplifications on loops after unrolling. It is
-/// useful to simplify the IV's in the new loop, as well as do a quick
-/// simplify/dce pass of the instructions.
-void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT,
- AssumptionCache *AC) {
- // Simplify any new induction variables in the partially unrolled loop.
- if (SE && SimplifyIVs) {
- SmallVector<WeakTrackingVH, 16> DeadInsts;
- simplifyLoopIVs(L, SE, DT, LI, DeadInsts);
-
- // Aggressively clean up dead instructions that simplifyLoopIVs already
- // identified. Any remaining should be cleaned up below.
- while (!DeadInsts.empty())
- if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
- RecursivelyDeleteTriviallyDeadInstructions(Inst);
- }
-
- // At this point, the code is well formed. We now do a quick sweep over the
- // inserted code, doing constant propagation and dead code elimination as we
- // go.
- const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
- for (BasicBlock *BB : L->getBlocks()) {
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
- Instruction *Inst = &*I++;
-
- if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC}))
- if (LI->replacementPreservesLCSSAForm(Inst, V))
- Inst->replaceAllUsesWith(V);
- if (isInstructionTriviallyDead(Inst))
- BB->getInstList().erase(Inst);
- }
- }
-
- // TODO: after peeling or unrolling, previously loop variant conditions are
- // likely to fold to constants, eagerly propagating those here will require
- // fewer cleanup passes to be run. Alternatively, a LoopEarlyCSE might be
- // appropriate.
-}
-
-/// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling
-/// can only fail when the loop's latch block is not terminated by a conditional
-/// branch instruction. However, if the trip count (and multiple) are not known,
-/// loop unrolling will mostly produce more code that is no faster.
-///
-/// TripCount is the upper bound of the iteration on which control exits
-/// LatchBlock. Control may exit the loop prior to TripCount iterations either
-/// via an early branch in other loop block or via LatchBlock terminator. This
-/// is relaxed from the general definition of trip count which is the number of
-/// times the loop header executes. Note that UnrollLoop assumes that the loop
-/// counter test is in LatchBlock in order to remove unnecesssary instances of
-/// the test. If control can exit the loop from the LatchBlock's terminator
-/// prior to TripCount iterations, flag PreserveCondBr needs to be set.
-///
-/// PreserveCondBr indicates whether the conditional branch of the LatchBlock
-/// needs to be preserved. It is needed when we use trip count upper bound to
-/// fully unroll the loop. If PreserveOnlyFirst is also set then only the first
-/// conditional branch needs to be preserved.
-///
-/// Similarly, TripMultiple divides the number of times that the LatchBlock may
-/// execute without exiting the loop.
-///
-/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that
-/// have a runtime (i.e. not compile time constant) trip count. Unrolling these
-/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count"
-/// iterations before branching into the unrolled loop. UnrollLoop will not
-/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
-/// AllowExpensiveTripCount is false.
-///
-/// If we want to perform PGO-based loop peeling, PeelCount is set to the
-/// number of iterations we want to peel off.
-///
-/// The LoopInfo Analysis that is passed will be kept consistent.
-///
-/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
-/// DominatorTree if they are non-null.
-///
-/// If RemainderLoop is non-null, it will receive the remainder loop (if
-/// required and not fully unrolled).
-LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT,
- AssumptionCache *AC,
- OptimizationRemarkEmitter *ORE,
- bool PreserveLCSSA, Loop **RemainderLoop) {
-
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader) {
- LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
- return LoopUnrollResult::Unmodified;
- }
-
- BasicBlock *LatchBlock = L->getLoopLatch();
- if (!LatchBlock) {
- LLVM_DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n");
- return LoopUnrollResult::Unmodified;
- }
-
- // Loops with indirectbr cannot be cloned.
- if (!L->isSafeToClone()) {
- LLVM_DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n");
- return LoopUnrollResult::Unmodified;
- }
-
- // The current loop unroll pass can unroll loops with a single latch or header
- // that's a conditional branch exiting the loop.
- // FIXME: The implementation can be extended to work with more complicated
- // cases, e.g. loops with multiple latches.
- BasicBlock *Header = L->getHeader();
- BranchInst *HeaderBI = dyn_cast<BranchInst>(Header->getTerminator());
- BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
-
- // FIXME: Support loops without conditional latch and multiple exiting blocks.
- if (!BI ||
- (BI->isUnconditional() && (!HeaderBI || HeaderBI->isUnconditional() ||
- L->getExitingBlock() != Header))) {
- LLVM_DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional "
- "branch in the latch or header.\n");
- return LoopUnrollResult::Unmodified;
- }
-
- auto CheckLatchSuccessors = [&](unsigned S1, unsigned S2) {
- return BI->isConditional() && BI->getSuccessor(S1) == Header &&
- !L->contains(BI->getSuccessor(S2));
- };
-
- // If we have a conditional latch, it must exit the loop.
- if (BI && BI->isConditional() && !CheckLatchSuccessors(0, 1) &&
- !CheckLatchSuccessors(1, 0)) {
- LLVM_DEBUG(
- dbgs() << "Can't unroll; a conditional latch must exit the loop");
- return LoopUnrollResult::Unmodified;
- }
-
- auto CheckHeaderSuccessors = [&](unsigned S1, unsigned S2) {
- return HeaderBI && HeaderBI->isConditional() &&
- L->contains(HeaderBI->getSuccessor(S1)) &&
- !L->contains(HeaderBI->getSuccessor(S2));
- };
-
- // If we do not have a conditional latch, the header must exit the loop.
- if (BI && !BI->isConditional() && HeaderBI && HeaderBI->isConditional() &&
- !CheckHeaderSuccessors(0, 1) && !CheckHeaderSuccessors(1, 0)) {
- LLVM_DEBUG(dbgs() << "Can't unroll; conditional header must exit the loop");
- return LoopUnrollResult::Unmodified;
- }
-
- if (Header->hasAddressTaken()) {
- // The loop-rotate pass can be helpful to avoid this in many cases.
- LLVM_DEBUG(
- dbgs() << " Won't unroll loop: address of header block is taken.\n");
- return LoopUnrollResult::Unmodified;
- }
-
- if (ULO.TripCount != 0)
- LLVM_DEBUG(dbgs() << " Trip Count = " << ULO.TripCount << "\n");
- if (ULO.TripMultiple != 1)
- LLVM_DEBUG(dbgs() << " Trip Multiple = " << ULO.TripMultiple << "\n");
-
- // Effectively "DCE" unrolled iterations that are beyond the tripcount
- // and will never be executed.
- if (ULO.TripCount != 0 && ULO.Count > ULO.TripCount)
- ULO.Count = ULO.TripCount;
-
- // Don't enter the unroll code if there is nothing to do.
- if (ULO.TripCount == 0 && ULO.Count < 2 && ULO.PeelCount == 0) {
- LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
- return LoopUnrollResult::Unmodified;
- }
-
- assert(ULO.Count > 0);
- assert(ULO.TripMultiple > 0);
- assert(ULO.TripCount == 0 || ULO.TripCount % ULO.TripMultiple == 0);
-
- // Are we eliminating the loop control altogether?
- bool CompletelyUnroll = ULO.Count == ULO.TripCount;
- SmallVector<BasicBlock *, 4> ExitBlocks;
- L->getExitBlocks(ExitBlocks);
- std::vector<BasicBlock*> OriginalLoopBlocks = L->getBlocks();
-
- // Go through all exits of L and see if there are any phi-nodes there. We just
- // conservatively assume that they're inserted to preserve LCSSA form, which
- // means that complete unrolling might break this form. We need to either fix
- // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For
- // now we just recompute LCSSA for the outer loop, but it should be possible
- // to fix it in-place.
- bool NeedToFixLCSSA = PreserveLCSSA && CompletelyUnroll &&
- any_of(ExitBlocks, [](const BasicBlock *BB) {
- return isa<PHINode>(BB->begin());
- });
-
- // We assume a run-time trip count if the compiler cannot
- // figure out the loop trip count and the unroll-runtime
- // flag is specified.
- bool RuntimeTripCount =
- (ULO.TripCount == 0 && ULO.Count > 0 && ULO.AllowRuntime);
-
- assert((!RuntimeTripCount || !ULO.PeelCount) &&
- "Did not expect runtime trip-count unrolling "
- "and peeling for the same loop");
-
- bool Peeled = false;
- if (ULO.PeelCount) {
- Peeled = peelLoop(L, ULO.PeelCount, LI, SE, DT, AC, PreserveLCSSA);
-
- // Successful peeling may result in a change in the loop preheader/trip
- // counts. If we later unroll the loop, we want these to be updated.
- if (Peeled) {
- // According to our guards and profitability checks the only
- // meaningful exit should be latch block. Other exits go to deopt,
- // so we do not worry about them.
- BasicBlock *ExitingBlock = L->getLoopLatch();
- assert(ExitingBlock && "Loop without exiting block?");
- assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?");
- Preheader = L->getLoopPreheader();
- ULO.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
- ULO.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
- }
- }
-
- // Loops containing convergent instructions must have a count that divides
- // their TripMultiple.
- LLVM_DEBUG(
- {
- bool HasConvergent = false;
- for (auto &BB : L->blocks())
- for (auto &I : *BB)
- if (auto CS = CallSite(&I))
- HasConvergent |= CS.isConvergent();
- assert((!HasConvergent || ULO.TripMultiple % ULO.Count == 0) &&
- "Unroll count must divide trip multiple if loop contains a "
- "convergent operation.");
- });
-
- bool EpilogProfitability =
- UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
- : isEpilogProfitable(L);
-
- if (RuntimeTripCount && ULO.TripMultiple % ULO.Count != 0 &&
- !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount,
- EpilogProfitability, ULO.UnrollRemainder,
- ULO.ForgetAllSCEV, LI, SE, DT, AC,
- PreserveLCSSA, RemainderLoop)) {
- if (ULO.Force)
- RuntimeTripCount = false;
- else {
- LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be "
- "generated when assuming runtime trip count\n");
- return LoopUnrollResult::Unmodified;
- }
- }
-
- // If we know the trip count, we know the multiple...
- unsigned BreakoutTrip = 0;
- if (ULO.TripCount != 0) {
- BreakoutTrip = ULO.TripCount % ULO.Count;
- ULO.TripMultiple = 0;
- } else {
- // Figure out what multiple to use.
- BreakoutTrip = ULO.TripMultiple =
- (unsigned)GreatestCommonDivisor64(ULO.Count, ULO.TripMultiple);
- }
-
- using namespace ore;
- // Report the unrolling decision.
- if (CompletelyUnroll) {
- LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
- << " with trip count " << ULO.TripCount << "!\n");
- if (ORE)
- ORE->emit([&]() {
- return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
- L->getHeader())
- << "completely unrolled loop with "
- << NV("UnrollCount", ULO.TripCount) << " iterations";
- });
- } else if (ULO.PeelCount) {
- LLVM_DEBUG(dbgs() << "PEELING loop %" << Header->getName()
- << " with iteration count " << ULO.PeelCount << "!\n");
- if (ORE)
- ORE->emit([&]() {
- return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
- L->getHeader())
- << " peeled loop by " << NV("PeelCount", ULO.PeelCount)
- << " iterations";
- });
- } else {
- auto DiagBuilder = [&]() {
- OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
- L->getHeader());
- return Diag << "unrolled loop by a factor of "
- << NV("UnrollCount", ULO.Count);
- };
-
- LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by "
- << ULO.Count);
- if (ULO.TripMultiple == 0 || BreakoutTrip != ULO.TripMultiple) {
- LLVM_DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
- if (ORE)
- ORE->emit([&]() {
- return DiagBuilder() << " with a breakout at trip "
- << NV("BreakoutTrip", BreakoutTrip);
- });
- } else if (ULO.TripMultiple != 1) {
- LLVM_DEBUG(dbgs() << " with " << ULO.TripMultiple << " trips per branch");
- if (ORE)
- ORE->emit([&]() {
- return DiagBuilder()
- << " with " << NV("TripMultiple", ULO.TripMultiple)
- << " trips per branch";
- });
- } else if (RuntimeTripCount) {
- LLVM_DEBUG(dbgs() << " with run-time trip count");
- if (ORE)
- ORE->emit(
- [&]() { return DiagBuilder() << " with run-time trip count"; });
- }
- LLVM_DEBUG(dbgs() << "!\n");
- }
-
- // We are going to make changes to this loop. SCEV may be keeping cached info
- // about it, in particular about backedge taken count. The changes we make
- // are guaranteed to invalidate this information for our loop. It is tempting
- // to only invalidate the loop being unrolled, but it is incorrect as long as
- // all exiting branches from all inner loops have impact on the outer loops,
- // and if something changes inside them then any of outer loops may also
- // change. When we forget outermost loop, we also forget all contained loops
- // and this is what we need here.
- if (SE) {
- if (ULO.ForgetAllSCEV)
- SE->forgetAllLoops();
- else
- SE->forgetTopmostLoop(L);
- }
-
- bool ContinueOnTrue;
- bool LatchIsExiting = BI->isConditional();
- BasicBlock *LoopExit = nullptr;
- if (LatchIsExiting) {
- ContinueOnTrue = L->contains(BI->getSuccessor(0));
- LoopExit = BI->getSuccessor(ContinueOnTrue);
- } else {
- NumUnrolledWithHeader++;
- ContinueOnTrue = L->contains(HeaderBI->getSuccessor(0));
- LoopExit = HeaderBI->getSuccessor(ContinueOnTrue);
- }
-
- // For the first iteration of the loop, we should use the precloned values for
- // PHI nodes. Insert associations now.
- ValueToValueMapTy LastValueMap;
- std::vector<PHINode*> OrigPHINode;
- for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
- OrigPHINode.push_back(cast<PHINode>(I));
- }
-
- std::vector<BasicBlock *> Headers;
- std::vector<BasicBlock *> HeaderSucc;
- std::vector<BasicBlock *> Latches;
- Headers.push_back(Header);
- Latches.push_back(LatchBlock);
-
- if (!LatchIsExiting) {
- auto *Term = cast<BranchInst>(Header->getTerminator());
- if (Term->isUnconditional() || L->contains(Term->getSuccessor(0))) {
- assert(L->contains(Term->getSuccessor(0)));
- HeaderSucc.push_back(Term->getSuccessor(0));
- } else {
- assert(L->contains(Term->getSuccessor(1)));
- HeaderSucc.push_back(Term->getSuccessor(1));
- }
- }
-
- // The current on-the-fly SSA update requires blocks to be processed in
- // reverse postorder so that LastValueMap contains the correct value at each
- // exit.
- LoopBlocksDFS DFS(L);
- DFS.perform(LI);
-
- // Stash the DFS iterators before adding blocks to the loop.
- LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
- LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
-
- std::vector<BasicBlock*> UnrolledLoopBlocks = L->getBlocks();
-
- // Loop Unrolling might create new loops. While we do preserve LoopInfo, we
- // might break loop-simplified form for these loops (as they, e.g., would
- // share the same exit blocks). We'll keep track of loops for which we can
- // break this so that later we can re-simplify them.
- SmallSetVector<Loop *, 4> LoopsToSimplify;
- for (Loop *SubLoop : *L)
- LoopsToSimplify.insert(SubLoop);
-
- if (Header->getParent()->isDebugInfoForProfiling())
- for (BasicBlock *BB : L->getBlocks())
- for (Instruction &I : *BB)
- if (!isa<DbgInfoIntrinsic>(&I))
- if (const DILocation *DIL = I.getDebugLoc()) {
- auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(ULO.Count);
- if (NewDIL)
- I.setDebugLoc(NewDIL.getValue());
- else
- LLVM_DEBUG(dbgs()
- << "Failed to create new discriminator: "
- << DIL->getFilename() << " Line: " << DIL->getLine());
- }
-
- for (unsigned It = 1; It != ULO.Count; ++It) {
- std::vector<BasicBlock*> NewBlocks;
- SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
- NewLoops[L] = L;
-
- for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
- ValueToValueMapTy VMap;
- BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
- Header->getParent()->getBasicBlockList().push_back(New);
-
- assert((*BB != Header || LI->getLoopFor(*BB) == L) &&
- "Header should not be in a sub-loop");
- // Tell LI about New.
- const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
- if (OldLoop)
- LoopsToSimplify.insert(NewLoops[OldLoop]);
-
- if (*BB == Header)
- // Loop over all of the PHI nodes in the block, changing them to use
- // the incoming values from the previous block.
- for (PHINode *OrigPHI : OrigPHINode) {
- PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]);
- Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
- if (Instruction *InValI = dyn_cast<Instruction>(InVal))
- if (It > 1 && L->contains(InValI))
- InVal = LastValueMap[InValI];
- VMap[OrigPHI] = InVal;
- New->getInstList().erase(NewPHI);
- }
-
- // Update our running map of newest clones
- LastValueMap[*BB] = New;
- for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
- VI != VE; ++VI)
- LastValueMap[VI->first] = VI->second;
-
- // Add phi entries for newly created values to all exit blocks.
- for (BasicBlock *Succ : successors(*BB)) {
- if (L->contains(Succ))
- continue;
- for (PHINode &PHI : Succ->phis()) {
- Value *Incoming = PHI.getIncomingValueForBlock(*BB);
- ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
- if (It != LastValueMap.end())
- Incoming = It->second;
- PHI.addIncoming(Incoming, New);
- }
- }
- // Keep track of new headers and latches as we create them, so that
- // we can insert the proper branches later.
- if (*BB == Header)
- Headers.push_back(New);
- if (*BB == LatchBlock)
- Latches.push_back(New);
-
- // Keep track of the successor of the new header in the current iteration.
- for (auto *Pred : predecessors(*BB))
- if (Pred == Header) {
- HeaderSucc.push_back(New);
- break;
- }
-
- NewBlocks.push_back(New);
- UnrolledLoopBlocks.push_back(New);
-
- // Update DomTree: since we just copy the loop body, and each copy has a
- // dedicated entry block (copy of the header block), this header's copy
- // dominates all copied blocks. That means, dominance relations in the
- // copied body are the same as in the original body.
- if (DT) {
- if (*BB == Header)
- DT->addNewBlock(New, Latches[It - 1]);
- else {
- auto BBDomNode = DT->getNode(*BB);
- auto BBIDom = BBDomNode->getIDom();
- BasicBlock *OriginalBBIDom = BBIDom->getBlock();
- DT->addNewBlock(
- New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
- }
- }
- }
-
- // Remap all instructions in the most recent iteration
- for (BasicBlock *NewBlock : NewBlocks) {
- for (Instruction &I : *NewBlock) {
- ::remapInstruction(&I, LastValueMap);
- if (auto *II = dyn_cast<IntrinsicInst>(&I))
- if (II->getIntrinsicID() == Intrinsic::assume)
- AC->registerAssumption(II);
- }
- }
- }
-
- // Loop over the PHI nodes in the original block, setting incoming values.
- for (PHINode *PN : OrigPHINode) {
- if (CompletelyUnroll) {
- PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
- Header->getInstList().erase(PN);
- } else if (ULO.Count > 1) {
- Value *InVal = PN->removeIncomingValue(LatchBlock, false);
- // If this value was defined in the loop, take the value defined by the
- // last iteration of the loop.
- if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
- if (L->contains(InValI))
- InVal = LastValueMap[InVal];
- }
- assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
- PN->addIncoming(InVal, Latches.back());
- }
- }
-
- auto setDest = [LoopExit, ContinueOnTrue](BasicBlock *Src, BasicBlock *Dest,
- ArrayRef<BasicBlock *> NextBlocks,
- BasicBlock *BlockInLoop,
- bool NeedConditional) {
- auto *Term = cast<BranchInst>(Src->getTerminator());
- if (NeedConditional) {
- // Update the conditional branch's successor for the following
- // iteration.
- Term->setSuccessor(!ContinueOnTrue, Dest);
- } else {
- // Remove phi operands at this loop exit
- if (Dest != LoopExit) {
- BasicBlock *BB = Src;
- for (BasicBlock *Succ : successors(BB)) {
- // Preserve the incoming value from BB if we are jumping to the block
- // in the current loop.
- if (Succ == BlockInLoop)
- continue;
- for (PHINode &Phi : Succ->phis())
- Phi.removeIncomingValue(BB, false);
- }
- }
- // Replace the conditional branch with an unconditional one.
- BranchInst::Create(Dest, Term);
- Term->eraseFromParent();
- }
- };
-
- // Now that all the basic blocks for the unrolled iterations are in place,
- // set up the branches to connect them.
- if (LatchIsExiting) {
- // Set up latches to branch to the new header in the unrolled iterations or
- // the loop exit for the last latch in a fully unrolled loop.
- for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
- // The branch destination.
- unsigned j = (i + 1) % e;
- BasicBlock *Dest = Headers[j];
- bool NeedConditional = true;
-
- if (RuntimeTripCount && j != 0) {
- NeedConditional = false;
- }
-
- // For a complete unroll, make the last iteration end with a branch
- // to the exit block.
- if (CompletelyUnroll) {
- if (j == 0)
- Dest = LoopExit;
- // If using trip count upper bound to completely unroll, we need to keep
- // the conditional branch except the last one because the loop may exit
- // after any iteration.
- assert(NeedConditional &&
- "NeedCondition cannot be modified by both complete "
- "unrolling and runtime unrolling");
- NeedConditional =
- (ULO.PreserveCondBr && j && !(ULO.PreserveOnlyFirst && i != 0));
- } else if (j != BreakoutTrip &&
- (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) {
- // If we know the trip count or a multiple of it, we can safely use an
- // unconditional branch for some iterations.
- NeedConditional = false;
- }
-
- setDest(Latches[i], Dest, Headers, Headers[i], NeedConditional);
- }
- } else {
- // Setup headers to branch to their new successors in the unrolled
- // iterations.
- for (unsigned i = 0, e = Headers.size(); i != e; ++i) {
- // The branch destination.
- unsigned j = (i + 1) % e;
- BasicBlock *Dest = HeaderSucc[i];
- bool NeedConditional = true;
-
- if (RuntimeTripCount && j != 0)
- NeedConditional = false;
-
- if (CompletelyUnroll)
- // We cannot drop the conditional branch for the last condition, as we
- // may have to execute the loop body depending on the condition.
- NeedConditional = j == 0 || ULO.PreserveCondBr;
- else if (j != BreakoutTrip &&
- (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0))
- // If we know the trip count or a multiple of it, we can safely use an
- // unconditional branch for some iterations.
- NeedConditional = false;
-
- setDest(Headers[i], Dest, Headers, HeaderSucc[i], NeedConditional);
- }
-
- // Set up latches to branch to the new header in the unrolled iterations or
- // the loop exit for the last latch in a fully unrolled loop.
-
- for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
- // The original branch was replicated in each unrolled iteration.
- BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
-
- // The branch destination.
- unsigned j = (i + 1) % e;
- BasicBlock *Dest = Headers[j];
-
- // When completely unrolling, the last latch becomes unreachable.
- if (CompletelyUnroll && j == 0)
- new UnreachableInst(Term->getContext(), Term);
- else
- // Replace the conditional branch with an unconditional one.
- BranchInst::Create(Dest, Term);
-
- Term->eraseFromParent();
- }
- }
-
- // Update dominators of blocks we might reach through exits.
- // Immediate dominator of such block might change, because we add more
- // routes which can lead to the exit: we can now reach it from the copied
- // iterations too.
- if (DT && ULO.Count > 1) {
- for (auto *BB : OriginalLoopBlocks) {
- auto *BBDomNode = DT->getNode(BB);
- SmallVector<BasicBlock *, 16> ChildrenToUpdate;
- for (auto *ChildDomNode : BBDomNode->getChildren()) {
- auto *ChildBB = ChildDomNode->getBlock();
- if (!L->contains(ChildBB))
- ChildrenToUpdate.push_back(ChildBB);
- }
- BasicBlock *NewIDom;
- BasicBlock *&TermBlock = LatchIsExiting ? LatchBlock : Header;
- auto &TermBlocks = LatchIsExiting ? Latches : Headers;
- if (BB == TermBlock) {
- // The latch is special because we emit unconditional branches in
- // some cases where the original loop contained a conditional branch.
- // Since the latch is always at the bottom of the loop, if the latch
- // dominated an exit before unrolling, the new dominator of that exit
- // must also be a latch. Specifically, the dominator is the first
- // latch which ends in a conditional branch, or the last latch if
- // there is no such latch.
- // For loops exiting from the header, we limit the supported loops
- // to have a single exiting block.
- NewIDom = TermBlocks.back();
- for (BasicBlock *Iter : TermBlocks) {
- Instruction *Term = Iter->getTerminator();
- if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
- NewIDom = Iter;
- break;
- }
- }
- } else {
- // The new idom of the block will be the nearest common dominator
- // of all copies of the previous idom. This is equivalent to the
- // nearest common dominator of the previous idom and the first latch,
- // which dominates all copies of the previous idom.
- NewIDom = DT->findNearestCommonDominator(BB, LatchBlock);
- }
- for (auto *ChildBB : ChildrenToUpdate)
- DT->changeImmediateDominator(ChildBB, NewIDom);
- }
- }
-
- assert(!DT || !UnrollVerifyDomtree ||
- DT->verify(DominatorTree::VerificationLevel::Fast));
-
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
- // Merge adjacent basic blocks, if possible.
- for (BasicBlock *Latch : Latches) {
- BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator());
- assert((Term ||
- (CompletelyUnroll && !LatchIsExiting && Latch == Latches.back())) &&
- "Need a branch as terminator, except when fully unrolling with "
- "unconditional latch");
- if (Term && Term->isUnconditional()) {
- BasicBlock *Dest = Term->getSuccessor(0);
- BasicBlock *Fold = Dest->getUniquePredecessor();
- if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {
- // Dest has been folded into Fold. Update our worklists accordingly.
- std::replace(Latches.begin(), Latches.end(), Dest, Fold);
- UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(),
- UnrolledLoopBlocks.end(), Dest),
- UnrolledLoopBlocks.end());
- }
- }
- }
-
- // At this point, the code is well formed. We now simplify the unrolled loop,
- // doing constant propagation and dead code elimination as we go.
- simplifyLoopAfterUnroll(L, !CompletelyUnroll && (ULO.Count > 1 || Peeled), LI,
- SE, DT, AC);
-
- NumCompletelyUnrolled += CompletelyUnroll;
- ++NumUnrolled;
-
- Loop *OuterL = L->getParentLoop();
- // Update LoopInfo if the loop is completely removed.
- if (CompletelyUnroll)
- LI->erase(L);
-
- // After complete unrolling most of the blocks should be contained in OuterL.
- // However, some of them might happen to be out of OuterL (e.g. if they
- // precede a loop exit). In this case we might need to insert PHI nodes in
- // order to preserve LCSSA form.
- // We don't need to check this if we already know that we need to fix LCSSA
- // form.
- // TODO: For now we just recompute LCSSA for the outer loop in this case, but
- // it should be possible to fix it in-place.
- if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA)
- NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI);
-
- // If we have a pass and a DominatorTree we should re-simplify impacted loops
- // to ensure subsequent analyses can rely on this form. We want to simplify
- // at least one layer outside of the loop that was unrolled so that any
- // changes to the parent loop exposed by the unrolling are considered.
- if (DT) {
- if (OuterL) {
- // OuterL includes all loops for which we can break loop-simplify, so
- // it's sufficient to simplify only it (it'll recursively simplify inner
- // loops too).
- if (NeedToFixLCSSA) {
- // LCSSA must be performed on the outermost affected loop. The unrolled
- // loop's last loop latch is guaranteed to be in the outermost loop
- // after LoopInfo's been updated by LoopInfo::erase.
- Loop *LatchLoop = LI->getLoopFor(Latches.back());
- Loop *FixLCSSALoop = OuterL;
- if (!FixLCSSALoop->contains(LatchLoop))
- while (FixLCSSALoop->getParentLoop() != LatchLoop)
- FixLCSSALoop = FixLCSSALoop->getParentLoop();
-
- formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE);
- } else if (PreserveLCSSA) {
- assert(OuterL->isLCSSAForm(*DT) &&
- "Loops should be in LCSSA form after loop-unroll.");
- }
-
- // TODO: That potentially might be compile-time expensive. We should try
- // to fix the loop-simplified form incrementally.
- simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA);
- } else {
- // Simplify loops for which we might've broken loop-simplify form.
- for (Loop *SubLoop : LoopsToSimplify)
- simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA);
- }
- }
-
- return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
- : LoopUnrollResult::PartiallyUnrolled;
-}
-
-/// Given an llvm.loop loop id metadata node, returns the loop hint metadata
-/// node with the given name (for example, "llvm.loop.unroll.count"). If no
-/// such metadata node exists, then nullptr is returned.
-MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) {
- // First operand should refer to the loop id itself.
- assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
- assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
-
- for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
- MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
- if (!MD)
- continue;
-
- MDString *S = dyn_cast<MDString>(MD->getOperand(0));
- if (!S)
- continue;
-
- if (Name.equals(S->getString()))
- return MD;
- }
- return nullptr;
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
deleted file mode 100644
index ff49d83f25c5..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ /dev/null
@@ -1,820 +0,0 @@
-//===-- LoopUnrollAndJam.cpp - Loop unrolling utilities -------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements loop unroll and jam as a routine, much like
-// LoopUnroll.cpp implements loop unroll.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/DependenceAnalysis.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LoopAnalysisManager.h"
-#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
-#include "llvm/Analysis/Utils/Local.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/LoopSimplify.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Utils/SimplifyIndVar.h"
-#include "llvm/Transforms/Utils/UnrollLoop.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "loop-unroll-and-jam"
-
-STATISTIC(NumUnrolledAndJammed, "Number of loops unroll and jammed");
-STATISTIC(NumCompletelyUnrolledAndJammed, "Number of loops unroll and jammed");
-
-typedef SmallPtrSet<BasicBlock *, 4> BasicBlockSet;
-
-// Partition blocks in an outer/inner loop pair into blocks before and after
-// the loop
-static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop,
- BasicBlockSet &ForeBlocks,
- BasicBlockSet &SubLoopBlocks,
- BasicBlockSet &AftBlocks,
- DominatorTree *DT) {
- BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
- SubLoopBlocks.insert(SubLoop->block_begin(), SubLoop->block_end());
-
- for (BasicBlock *BB : L->blocks()) {
- if (!SubLoop->contains(BB)) {
- if (DT->dominates(SubLoopLatch, BB))
- AftBlocks.insert(BB);
- else
- ForeBlocks.insert(BB);
- }
- }
-
- // Check that all blocks in ForeBlocks together dominate the subloop
- // TODO: This might ideally be done better with a dominator/postdominators.
- BasicBlock *SubLoopPreHeader = SubLoop->getLoopPreheader();
- for (BasicBlock *BB : ForeBlocks) {
- if (BB == SubLoopPreHeader)
- continue;
- Instruction *TI = BB->getTerminator();
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- if (!ForeBlocks.count(TI->getSuccessor(i)))
- return false;
- }
-
- return true;
-}
-
-// Looks at the phi nodes in Header for values coming from Latch. For these
-// instructions and all their operands calls Visit on them, keeping going for
-// all the operands in AftBlocks. Returns false if Visit returns false,
-// otherwise returns true. This is used to process the instructions in the
-// Aft blocks that need to be moved before the subloop. It is used in two
-// places. One to check that the required set of instructions can be moved
-// before the loop. Then to collect the instructions to actually move in
-// moveHeaderPhiOperandsToForeBlocks.
-template <typename T>
-static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch,
- BasicBlockSet &AftBlocks, T Visit) {
- SmallVector<Instruction *, 8> Worklist;
- for (auto &Phi : Header->phis()) {
- Value *V = Phi.getIncomingValueForBlock(Latch);
- if (Instruction *I = dyn_cast<Instruction>(V))
- Worklist.push_back(I);
- }
-
- while (!Worklist.empty()) {
- Instruction *I = Worklist.back();
- Worklist.pop_back();
- if (!Visit(I))
- return false;
-
- if (AftBlocks.count(I->getParent()))
- for (auto &U : I->operands())
- if (Instruction *II = dyn_cast<Instruction>(U))
- Worklist.push_back(II);
- }
-
- return true;
-}
-
-// Move the phi operands of Header from Latch out of AftBlocks to InsertLoc.
-static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header,
- BasicBlock *Latch,
- Instruction *InsertLoc,
- BasicBlockSet &AftBlocks) {
- // We need to ensure we move the instructions in the correct order,
- // starting with the earliest required instruction and moving forward.
- std::vector<Instruction *> Visited;
- processHeaderPhiOperands(Header, Latch, AftBlocks,
- [&Visited, &AftBlocks](Instruction *I) {
- if (AftBlocks.count(I->getParent()))
- Visited.push_back(I);
- return true;
- });
-
- // Move all instructions in program order to before the InsertLoc
- BasicBlock *InsertLocBB = InsertLoc->getParent();
- for (Instruction *I : reverse(Visited)) {
- if (I->getParent() != InsertLocBB)
- I->moveBefore(InsertLoc);
- }
-}
-
-/*
- This method performs Unroll and Jam. For a simple loop like:
- for (i = ..)
- Fore(i)
- for (j = ..)
- SubLoop(i, j)
- Aft(i)
-
- Instead of doing normal inner or outer unrolling, we do:
- for (i = .., i+=2)
- Fore(i)
- Fore(i+1)
- for (j = ..)
- SubLoop(i, j)
- SubLoop(i+1, j)
- Aft(i)
- Aft(i+1)
-
- So the outer loop is essetially unrolled and then the inner loops are fused
- ("jammed") together into a single loop. This can increase speed when there
- are loads in SubLoop that are invariant to i, as they become shared between
- the now jammed inner loops.
-
- We do this by spliting the blocks in the loop into Fore, Subloop and Aft.
- Fore blocks are those before the inner loop, Aft are those after. Normal
- Unroll code is used to copy each of these sets of blocks and the results are
- combined together into the final form above.
-
- isSafeToUnrollAndJam should be used prior to calling this to make sure the
- unrolling will be valid. Checking profitablility is also advisable.
-
- If EpilogueLoop is non-null, it receives the epilogue loop (if it was
- necessary to create one and not fully unrolled).
-*/
-LoopUnrollResult llvm::UnrollAndJamLoop(
- Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple,
- bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
- AssumptionCache *AC, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop) {
-
- // When we enter here we should have already checked that it is safe
- BasicBlock *Header = L->getHeader();
- assert(L->getSubLoops().size() == 1);
- Loop *SubLoop = *L->begin();
-
- // Don't enter the unroll code if there is nothing to do.
- if (TripCount == 0 && Count < 2) {
- LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; almost nothing to do\n");
- return LoopUnrollResult::Unmodified;
- }
-
- assert(Count > 0);
- assert(TripMultiple > 0);
- assert(TripCount == 0 || TripCount % TripMultiple == 0);
-
- // Are we eliminating the loop control altogether?
- bool CompletelyUnroll = (Count == TripCount);
-
- // We use the runtime remainder in cases where we don't know trip multiple
- if (TripMultiple == 1 || TripMultiple % Count != 0) {
- if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
- /*UseEpilogRemainder*/ true,
- UnrollRemainder, /*ForgetAllSCEV*/ false,
- LI, SE, DT, AC, true, EpilogueLoop)) {
- LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
- "generated when assuming runtime trip count\n");
- return LoopUnrollResult::Unmodified;
- }
- }
-
- // Notify ScalarEvolution that the loop will be substantially changed,
- // if not outright eliminated.
- if (SE) {
- SE->forgetLoop(L);
- SE->forgetLoop(SubLoop);
- }
-
- using namespace ore;
- // Report the unrolling decision.
- if (CompletelyUnroll) {
- LLVM_DEBUG(dbgs() << "COMPLETELY UNROLL AND JAMMING loop %"
- << Header->getName() << " with trip count " << TripCount
- << "!\n");
- ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
- L->getHeader())
- << "completely unroll and jammed loop with "
- << NV("UnrollCount", TripCount) << " iterations");
- } else {
- auto DiagBuilder = [&]() {
- OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
- L->getHeader());
- return Diag << "unroll and jammed loop by a factor of "
- << NV("UnrollCount", Count);
- };
-
- LLVM_DEBUG(dbgs() << "UNROLL AND JAMMING loop %" << Header->getName()
- << " by " << Count);
- if (TripMultiple != 1) {
- LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
- ORE->emit([&]() {
- return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple)
- << " trips per branch";
- });
- } else {
- LLVM_DEBUG(dbgs() << " with run-time trip count");
- ORE->emit([&]() { return DiagBuilder() << " with run-time trip count"; });
- }
- LLVM_DEBUG(dbgs() << "!\n");
- }
-
- BasicBlock *Preheader = L->getLoopPreheader();
- BasicBlock *LatchBlock = L->getLoopLatch();
- BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
- assert(Preheader && LatchBlock && Header);
- assert(BI && !BI->isUnconditional());
- bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
- BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
- bool SubLoopContinueOnTrue = SubLoop->contains(
- SubLoop->getLoopLatch()->getTerminator()->getSuccessor(0));
-
- // Partition blocks in an outer/inner loop pair into blocks before and after
- // the loop
- BasicBlockSet SubLoopBlocks;
- BasicBlockSet ForeBlocks;
- BasicBlockSet AftBlocks;
- partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, AftBlocks,
- DT);
-
- // We keep track of the entering/first and exiting/last block of each of
- // Fore/SubLoop/Aft in each iteration. This helps make the stapling up of
- // blocks easier.
- std::vector<BasicBlock *> ForeBlocksFirst;
- std::vector<BasicBlock *> ForeBlocksLast;
- std::vector<BasicBlock *> SubLoopBlocksFirst;
- std::vector<BasicBlock *> SubLoopBlocksLast;
- std::vector<BasicBlock *> AftBlocksFirst;
- std::vector<BasicBlock *> AftBlocksLast;
- ForeBlocksFirst.push_back(Header);
- ForeBlocksLast.push_back(SubLoop->getLoopPreheader());
- SubLoopBlocksFirst.push_back(SubLoop->getHeader());
- SubLoopBlocksLast.push_back(SubLoop->getExitingBlock());
- AftBlocksFirst.push_back(SubLoop->getExitBlock());
- AftBlocksLast.push_back(L->getExitingBlock());
- // Maps Blocks[0] -> Blocks[It]
- ValueToValueMapTy LastValueMap;
-
- // Move any instructions from fore phi operands from AftBlocks into Fore.
- moveHeaderPhiOperandsToForeBlocks(
- Header, LatchBlock, SubLoop->getLoopPreheader()->getTerminator(),
- AftBlocks);
-
- // The current on-the-fly SSA update requires blocks to be processed in
- // reverse postorder so that LastValueMap contains the correct value at each
- // exit.
- LoopBlocksDFS DFS(L);
- DFS.perform(LI);
- // Stash the DFS iterators before adding blocks to the loop.
- LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
- LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
-
- if (Header->getParent()->isDebugInfoForProfiling())
- for (BasicBlock *BB : L->getBlocks())
- for (Instruction &I : *BB)
- if (!isa<DbgInfoIntrinsic>(&I))
- if (const DILocation *DIL = I.getDebugLoc()) {
- auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(Count);
- if (NewDIL)
- I.setDebugLoc(NewDIL.getValue());
- else
- LLVM_DEBUG(dbgs()
- << "Failed to create new discriminator: "
- << DIL->getFilename() << " Line: " << DIL->getLine());
- }
-
- // Copy all blocks
- for (unsigned It = 1; It != Count; ++It) {
- std::vector<BasicBlock *> NewBlocks;
- // Maps Blocks[It] -> Blocks[It-1]
- DenseMap<Value *, Value *> PrevItValueMap;
-
- for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
- ValueToValueMapTy VMap;
- BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
- Header->getParent()->getBasicBlockList().push_back(New);
-
- if (ForeBlocks.count(*BB)) {
- L->addBasicBlockToLoop(New, *LI);
-
- if (*BB == ForeBlocksFirst[0])
- ForeBlocksFirst.push_back(New);
- if (*BB == ForeBlocksLast[0])
- ForeBlocksLast.push_back(New);
- } else if (SubLoopBlocks.count(*BB)) {
- SubLoop->addBasicBlockToLoop(New, *LI);
-
- if (*BB == SubLoopBlocksFirst[0])
- SubLoopBlocksFirst.push_back(New);
- if (*BB == SubLoopBlocksLast[0])
- SubLoopBlocksLast.push_back(New);
- } else if (AftBlocks.count(*BB)) {
- L->addBasicBlockToLoop(New, *LI);
-
- if (*BB == AftBlocksFirst[0])
- AftBlocksFirst.push_back(New);
- if (*BB == AftBlocksLast[0])
- AftBlocksLast.push_back(New);
- } else {
- llvm_unreachable("BB being cloned should be in Fore/Sub/Aft");
- }
-
- // Update our running maps of newest clones
- PrevItValueMap[New] = (It == 1 ? *BB : LastValueMap[*BB]);
- LastValueMap[*BB] = New;
- for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
- VI != VE; ++VI) {
- PrevItValueMap[VI->second] =
- const_cast<Value *>(It == 1 ? VI->first : LastValueMap[VI->first]);
- LastValueMap[VI->first] = VI->second;
- }
-
- NewBlocks.push_back(New);
-
- // Update DomTree:
- if (*BB == ForeBlocksFirst[0])
- DT->addNewBlock(New, ForeBlocksLast[It - 1]);
- else if (*BB == SubLoopBlocksFirst[0])
- DT->addNewBlock(New, SubLoopBlocksLast[It - 1]);
- else if (*BB == AftBlocksFirst[0])
- DT->addNewBlock(New, AftBlocksLast[It - 1]);
- else {
- // Each set of blocks (Fore/Sub/Aft) will have the same internal domtree
- // structure.
- auto BBDomNode = DT->getNode(*BB);
- auto BBIDom = BBDomNode->getIDom();
- BasicBlock *OriginalBBIDom = BBIDom->getBlock();
- assert(OriginalBBIDom);
- assert(LastValueMap[cast<Value>(OriginalBBIDom)]);
- DT->addNewBlock(
- New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
- }
- }
-
- // Remap all instructions in the most recent iteration
- for (BasicBlock *NewBlock : NewBlocks) {
- for (Instruction &I : *NewBlock) {
- ::remapInstruction(&I, LastValueMap);
- if (auto *II = dyn_cast<IntrinsicInst>(&I))
- if (II->getIntrinsicID() == Intrinsic::assume)
- AC->registerAssumption(II);
- }
- }
-
- // Alter the ForeBlocks phi's, pointing them at the latest version of the
- // value from the previous iteration's phis
- for (PHINode &Phi : ForeBlocksFirst[It]->phis()) {
- Value *OldValue = Phi.getIncomingValueForBlock(AftBlocksLast[It]);
- assert(OldValue && "should have incoming edge from Aft[It]");
- Value *NewValue = OldValue;
- if (Value *PrevValue = PrevItValueMap[OldValue])
- NewValue = PrevValue;
-
- assert(Phi.getNumOperands() == 2);
- Phi.setIncomingBlock(0, ForeBlocksLast[It - 1]);
- Phi.setIncomingValue(0, NewValue);
- Phi.removeIncomingValue(1);
- }
- }
-
- // Now that all the basic blocks for the unrolled iterations are in place,
- // finish up connecting the blocks and phi nodes. At this point LastValueMap
- // is the last unrolled iterations values.
-
- // Update Phis in BB from OldBB to point to NewBB
- auto updatePHIBlocks = [](BasicBlock *BB, BasicBlock *OldBB,
- BasicBlock *NewBB) {
- for (PHINode &Phi : BB->phis()) {
- int I = Phi.getBasicBlockIndex(OldBB);
- Phi.setIncomingBlock(I, NewBB);
- }
- };
- // Update Phis in BB from OldBB to point to NewBB and use the latest value
- // from LastValueMap
- auto updatePHIBlocksAndValues = [](BasicBlock *BB, BasicBlock *OldBB,
- BasicBlock *NewBB,
- ValueToValueMapTy &LastValueMap) {
- for (PHINode &Phi : BB->phis()) {
- for (unsigned b = 0; b < Phi.getNumIncomingValues(); ++b) {
- if (Phi.getIncomingBlock(b) == OldBB) {
- Value *OldValue = Phi.getIncomingValue(b);
- if (Value *LastValue = LastValueMap[OldValue])
- Phi.setIncomingValue(b, LastValue);
- Phi.setIncomingBlock(b, NewBB);
- break;
- }
- }
- }
- };
- // Move all the phis from Src into Dest
- auto movePHIs = [](BasicBlock *Src, BasicBlock *Dest) {
- Instruction *insertPoint = Dest->getFirstNonPHI();
- while (PHINode *Phi = dyn_cast<PHINode>(Src->begin()))
- Phi->moveBefore(insertPoint);
- };
-
- // Update the PHI values outside the loop to point to the last block
- updatePHIBlocksAndValues(LoopExit, AftBlocksLast[0], AftBlocksLast.back(),
- LastValueMap);
-
- // Update ForeBlocks successors and phi nodes
- BranchInst *ForeTerm =
- cast<BranchInst>(ForeBlocksLast.back()->getTerminator());
- BasicBlock *Dest = SubLoopBlocksFirst[0];
- ForeTerm->setSuccessor(0, Dest);
-
- if (CompletelyUnroll) {
- while (PHINode *Phi = dyn_cast<PHINode>(ForeBlocksFirst[0]->begin())) {
- Phi->replaceAllUsesWith(Phi->getIncomingValueForBlock(Preheader));
- Phi->getParent()->getInstList().erase(Phi);
- }
- } else {
- // Update the PHI values to point to the last aft block
- updatePHIBlocksAndValues(ForeBlocksFirst[0], AftBlocksLast[0],
- AftBlocksLast.back(), LastValueMap);
- }
-
- for (unsigned It = 1; It != Count; It++) {
- // Remap ForeBlock successors from previous iteration to this
- BranchInst *ForeTerm =
- cast<BranchInst>(ForeBlocksLast[It - 1]->getTerminator());
- BasicBlock *Dest = ForeBlocksFirst[It];
- ForeTerm->setSuccessor(0, Dest);
- }
-
- // Subloop successors and phis
- BranchInst *SubTerm =
- cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator());
- SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]);
- SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]);
- updatePHIBlocks(SubLoopBlocksFirst[0], ForeBlocksLast[0],
- ForeBlocksLast.back());
- updatePHIBlocks(SubLoopBlocksFirst[0], SubLoopBlocksLast[0],
- SubLoopBlocksLast.back());
-
- for (unsigned It = 1; It != Count; It++) {
- // Replace the conditional branch of the previous iteration subloop with an
- // unconditional one to this one
- BranchInst *SubTerm =
- cast<BranchInst>(SubLoopBlocksLast[It - 1]->getTerminator());
- BranchInst::Create(SubLoopBlocksFirst[It], SubTerm);
- SubTerm->eraseFromParent();
-
- updatePHIBlocks(SubLoopBlocksFirst[It], ForeBlocksLast[It],
- ForeBlocksLast.back());
- updatePHIBlocks(SubLoopBlocksFirst[It], SubLoopBlocksLast[It],
- SubLoopBlocksLast.back());
- movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]);
- }
-
- // Aft blocks successors and phis
- BranchInst *Term = cast<BranchInst>(AftBlocksLast.back()->getTerminator());
- if (CompletelyUnroll) {
- BranchInst::Create(LoopExit, Term);
- Term->eraseFromParent();
- } else {
- Term->setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]);
- }
- updatePHIBlocks(AftBlocksFirst[0], SubLoopBlocksLast[0],
- SubLoopBlocksLast.back());
-
- for (unsigned It = 1; It != Count; It++) {
- // Replace the conditional branch of the previous iteration subloop with an
- // unconditional one to this one
- BranchInst *AftTerm =
- cast<BranchInst>(AftBlocksLast[It - 1]->getTerminator());
- BranchInst::Create(AftBlocksFirst[It], AftTerm);
- AftTerm->eraseFromParent();
-
- updatePHIBlocks(AftBlocksFirst[It], SubLoopBlocksLast[It],
- SubLoopBlocksLast.back());
- movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]);
- }
-
- // Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the
- // new ones required.
- if (Count != 1) {
- SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
- DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, ForeBlocksLast[0],
- SubLoopBlocksFirst[0]);
- DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete,
- SubLoopBlocksLast[0], AftBlocksFirst[0]);
-
- DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
- ForeBlocksLast.back(), SubLoopBlocksFirst[0]);
- DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
- SubLoopBlocksLast.back(), AftBlocksFirst[0]);
- DT->applyUpdates(DTUpdates);
- }
-
- // Merge adjacent basic blocks, if possible.
- SmallPtrSet<BasicBlock *, 16> MergeBlocks;
- MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
- MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
- MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end());
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
- while (!MergeBlocks.empty()) {
- BasicBlock *BB = *MergeBlocks.begin();
- BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
- if (Term && Term->isUnconditional() && L->contains(Term->getSuccessor(0))) {
- BasicBlock *Dest = Term->getSuccessor(0);
- BasicBlock *Fold = Dest->getUniquePredecessor();
- if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {
- // Don't remove BB and add Fold as they are the same BB
- assert(Fold == BB);
- (void)Fold;
- MergeBlocks.erase(Dest);
- } else
- MergeBlocks.erase(BB);
- } else
- MergeBlocks.erase(BB);
- }
-
- // At this point, the code is well formed. We now do a quick sweep over the
- // inserted code, doing constant propagation and dead code elimination as we
- // go.
- simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC);
- simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC);
-
- NumCompletelyUnrolledAndJammed += CompletelyUnroll;
- ++NumUnrolledAndJammed;
-
-#ifndef NDEBUG
- // We shouldn't have done anything to break loop simplify form or LCSSA.
- Loop *OuterL = L->getParentLoop();
- Loop *OutestLoop = OuterL ? OuterL : (!CompletelyUnroll ? L : SubLoop);
- assert(OutestLoop->isRecursivelyLCSSAForm(*DT, *LI));
- if (!CompletelyUnroll)
- assert(L->isLoopSimplifyForm());
- assert(SubLoop->isLoopSimplifyForm());
- assert(DT->verify());
-#endif
-
- // Update LoopInfo if the loop is completely removed.
- if (CompletelyUnroll)
- LI->erase(L);
-
- return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
- : LoopUnrollResult::PartiallyUnrolled;
-}
-
-static bool getLoadsAndStores(BasicBlockSet &Blocks,
- SmallVector<Value *, 4> &MemInstr) {
- // Scan the BBs and collect legal loads and stores.
- // Returns false if non-simple loads/stores are found.
- for (BasicBlock *BB : Blocks) {
- for (Instruction &I : *BB) {
- if (auto *Ld = dyn_cast<LoadInst>(&I)) {
- if (!Ld->isSimple())
- return false;
- MemInstr.push_back(&I);
- } else if (auto *St = dyn_cast<StoreInst>(&I)) {
- if (!St->isSimple())
- return false;
- MemInstr.push_back(&I);
- } else if (I.mayReadOrWriteMemory()) {
- return false;
- }
- }
- }
- return true;
-}
-
-static bool checkDependencies(SmallVector<Value *, 4> &Earlier,
- SmallVector<Value *, 4> &Later,
- unsigned LoopDepth, bool InnerLoop,
- DependenceInfo &DI) {
- // Use DA to check for dependencies between loads and stores that make unroll
- // and jam invalid
- for (Value *I : Earlier) {
- for (Value *J : Later) {
- Instruction *Src = cast<Instruction>(I);
- Instruction *Dst = cast<Instruction>(J);
- if (Src == Dst)
- continue;
- // Ignore Input dependencies.
- if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
- continue;
-
- // Track dependencies, and if we find them take a conservative approach
- // by allowing only = or < (not >), altough some > would be safe
- // (depending upon unroll width).
- // For the inner loop, we need to disallow any (> <) dependencies
- // FIXME: Allow > so long as distance is less than unroll width
- if (auto D = DI.depends(Src, Dst, true)) {
- assert(D->isOrdered() && "Expected an output, flow or anti dep.");
-
- if (D->isConfused()) {
- LLVM_DEBUG(dbgs() << " Confused dependency between:\n"
- << " " << *Src << "\n"
- << " " << *Dst << "\n");
- return false;
- }
- if (!InnerLoop) {
- if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT) {
- LLVM_DEBUG(dbgs() << " > dependency between:\n"
- << " " << *Src << "\n"
- << " " << *Dst << "\n");
- return false;
- }
- } else {
- assert(LoopDepth + 1 <= D->getLevels());
- if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT &&
- D->getDirection(LoopDepth + 1) & Dependence::DVEntry::LT) {
- LLVM_DEBUG(dbgs() << " < > dependency between:\n"
- << " " << *Src << "\n"
- << " " << *Dst << "\n");
- return false;
- }
- }
- }
- }
- }
- return true;
-}
-
-static bool checkDependencies(Loop *L, BasicBlockSet &ForeBlocks,
- BasicBlockSet &SubLoopBlocks,
- BasicBlockSet &AftBlocks, DependenceInfo &DI) {
- // Get all loads/store pairs for each blocks
- SmallVector<Value *, 4> ForeMemInstr;
- SmallVector<Value *, 4> SubLoopMemInstr;
- SmallVector<Value *, 4> AftMemInstr;
- if (!getLoadsAndStores(ForeBlocks, ForeMemInstr) ||
- !getLoadsAndStores(SubLoopBlocks, SubLoopMemInstr) ||
- !getLoadsAndStores(AftBlocks, AftMemInstr))
- return false;
-
- // Check for dependencies between any blocks that may change order
- unsigned LoopDepth = L->getLoopDepth();
- return checkDependencies(ForeMemInstr, SubLoopMemInstr, LoopDepth, false,
- DI) &&
- checkDependencies(ForeMemInstr, AftMemInstr, LoopDepth, false, DI) &&
- checkDependencies(SubLoopMemInstr, AftMemInstr, LoopDepth, false,
- DI) &&
- checkDependencies(SubLoopMemInstr, SubLoopMemInstr, LoopDepth, true,
- DI);
-}
-
-bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
- DependenceInfo &DI) {
- /* We currently handle outer loops like this:
- |
- ForeFirst <----\ }
- Blocks | } ForeBlocks
- ForeLast | }
- | |
- SubLoopFirst <\ | }
- Blocks | | } SubLoopBlocks
- SubLoopLast -/ | }
- | |
- AftFirst | }
- Blocks | } AftBlocks
- AftLast ------/ }
- |
-
- There are (theoretically) any number of blocks in ForeBlocks, SubLoopBlocks
- and AftBlocks, providing that there is one edge from Fores to SubLoops,
- one edge from SubLoops to Afts and a single outer loop exit (from Afts).
- In practice we currently limit Aft blocks to a single block, and limit
- things further in the profitablility checks of the unroll and jam pass.
-
- Because of the way we rearrange basic blocks, we also require that
- the Fore blocks on all unrolled iterations are safe to move before the
- SubLoop blocks of all iterations. So we require that the phi node looping
- operands of ForeHeader can be moved to at least the end of ForeEnd, so that
- we can arrange cloned Fore Blocks before the subloop and match up Phi's
- correctly.
-
- i.e. The old order of blocks used to be F1 S1_1 S1_2 A1 F2 S2_1 S2_2 A2.
- It needs to be safe to tranform this to F1 F2 S1_1 S2_1 S1_2 S2_2 A1 A2.
-
- There are then a number of checks along the lines of no calls, no
- exceptions, inner loop IV is consistent, etc. Note that for loops requiring
- runtime unrolling, UnrollRuntimeLoopRemainder can also fail in
- UnrollAndJamLoop if the trip count cannot be easily calculated.
- */
-
- if (!L->isLoopSimplifyForm() || L->getSubLoops().size() != 1)
- return false;
- Loop *SubLoop = L->getSubLoops()[0];
- if (!SubLoop->isLoopSimplifyForm())
- return false;
-
- BasicBlock *Header = L->getHeader();
- BasicBlock *Latch = L->getLoopLatch();
- BasicBlock *Exit = L->getExitingBlock();
- BasicBlock *SubLoopHeader = SubLoop->getHeader();
- BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
- BasicBlock *SubLoopExit = SubLoop->getExitingBlock();
-
- if (Latch != Exit)
- return false;
- if (SubLoopLatch != SubLoopExit)
- return false;
-
- if (Header->hasAddressTaken() || SubLoopHeader->hasAddressTaken()) {
- LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Address taken\n");
- return false;
- }
-
- // Split blocks into Fore/SubLoop/Aft based on dominators
- BasicBlockSet SubLoopBlocks;
- BasicBlockSet ForeBlocks;
- BasicBlockSet AftBlocks;
- if (!partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks,
- AftBlocks, &DT)) {
- LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Incompatible loop layout\n");
- return false;
- }
-
- // Aft blocks may need to move instructions to fore blocks, which becomes more
- // difficult if there are multiple (potentially conditionally executed)
- // blocks. For now we just exclude loops with multiple aft blocks.
- if (AftBlocks.size() != 1) {
- LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Can't currently handle "
- "multiple blocks after the loop\n");
- return false;
- }
-
- // Check inner loop backedge count is consistent on all iterations of the
- // outer loop
- if (!hasIterationCountInvariantInParent(SubLoop, SE)) {
- LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Inner loop iteration count is "
- "not consistent on each iteration\n");
- return false;
- }
-
- // Check the loop safety info for exceptions.
- SimpleLoopSafetyInfo LSI;
- LSI.computeLoopSafetyInfo(L);
- if (LSI.anyBlockMayThrow()) {
- LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Something may throw\n");
- return false;
- }
-
- // We've ruled out the easy stuff and now need to check that there are no
- // interdependencies which may prevent us from moving the:
- // ForeBlocks before Subloop and AftBlocks.
- // Subloop before AftBlocks.
- // ForeBlock phi operands before the subloop
-
- // Make sure we can move all instructions we need to before the subloop
- if (!processHeaderPhiOperands(
- Header, Latch, AftBlocks, [&AftBlocks, &SubLoop](Instruction *I) {
- if (SubLoop->contains(I->getParent()))
- return false;
- if (AftBlocks.count(I->getParent())) {
- // If we hit a phi node in afts we know we are done (probably
- // LCSSA)
- if (isa<PHINode>(I))
- return false;
- // Can't move instructions with side effects or memory
- // reads/writes
- if (I->mayHaveSideEffects() || I->mayReadOrWriteMemory())
- return false;
- }
- // Keep going
- return true;
- })) {
- LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't move required "
- "instructions after subloop to before it\n");
- return false;
- }
-
- // Check for memory dependencies which prohibit the unrolling we are doing.
- // Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check
- // there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub.
- if (!checkDependencies(L, ForeBlocks, SubLoopBlocks, AftBlocks, DI)) {
- LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; failed dependency check\n");
- return false;
- }
-
- return true;
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
deleted file mode 100644
index 005306cf1898..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ /dev/null
@@ -1,744 +0,0 @@
-//===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements some loop unrolling utilities for peeling loops
-// with dynamically inferred (from PGO) trip counts. See LoopUnroll.cpp for
-// unrolling loops with compile-time constant trip counts.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/PatternMatch.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/LoopSimplify.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Utils/UnrollLoop.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <limits>
-
-using namespace llvm;
-using namespace llvm::PatternMatch;
-
-#define DEBUG_TYPE "loop-unroll"
-
-STATISTIC(NumPeeled, "Number of loops peeled");
-
-static cl::opt<unsigned> UnrollPeelMaxCount(
- "unroll-peel-max-count", cl::init(7), cl::Hidden,
- cl::desc("Max average trip count which will cause loop peeling."));
-
-static cl::opt<unsigned> UnrollForcePeelCount(
- "unroll-force-peel-count", cl::init(0), cl::Hidden,
- cl::desc("Force a peel count regardless of profiling information."));
-
-static cl::opt<bool> UnrollPeelMultiDeoptExit(
- "unroll-peel-multi-deopt-exit", cl::init(false), cl::Hidden,
- cl::desc("Allow peeling of loops with multiple deopt exits."));
-
-// Designates that a Phi is estimated to become invariant after an "infinite"
-// number of loop iterations (i.e. only may become an invariant if the loop is
-// fully unrolled).
-static const unsigned InfiniteIterationsToInvariance =
- std::numeric_limits<unsigned>::max();
-
-// Check whether we are capable of peeling this loop.
-bool llvm::canPeel(Loop *L) {
- // Make sure the loop is in simplified form
- if (!L->isLoopSimplifyForm())
- return false;
-
- if (UnrollPeelMultiDeoptExit) {
- SmallVector<BasicBlock *, 4> Exits;
- L->getUniqueNonLatchExitBlocks(Exits);
-
- if (!Exits.empty()) {
- // Latch's terminator is a conditional branch, Latch is exiting and
- // all non Latch exits ends up with deoptimize.
- const BasicBlock *Latch = L->getLoopLatch();
- const BranchInst *T = dyn_cast<BranchInst>(Latch->getTerminator());
- return T && T->isConditional() && L->isLoopExiting(Latch) &&
- all_of(Exits, [](const BasicBlock *BB) {
- return BB->getTerminatingDeoptimizeCall();
- });
- }
- }
-
- // Only peel loops that contain a single exit
- if (!L->getExitingBlock() || !L->getUniqueExitBlock())
- return false;
-
- // Don't try to peel loops where the latch is not the exiting block.
- // This can be an indication of two different things:
- // 1) The loop is not rotated.
- // 2) The loop contains irreducible control flow that involves the latch.
- if (L->getLoopLatch() != L->getExitingBlock())
- return false;
-
- return true;
-}
-
-// This function calculates the number of iterations after which the given Phi
-// becomes an invariant. The pre-calculated values are memorized in the map. The
-// function (shortcut is I) is calculated according to the following definition:
-// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
-// If %y is a loop invariant, then I(%x) = 1.
-// If %y is a Phi from the loop header, I(%x) = I(%y) + 1.
-// Otherwise, I(%x) is infinite.
-// TODO: Actually if %y is an expression that depends only on Phi %z and some
-// loop invariants, we can estimate I(%x) = I(%z) + 1. The example
-// looks like:
-// %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration.
-// %y = phi(0, 5),
-// %a = %y + 1.
-static unsigned calculateIterationsToInvariance(
- PHINode *Phi, Loop *L, BasicBlock *BackEdge,
- SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) {
- assert(Phi->getParent() == L->getHeader() &&
- "Non-loop Phi should not be checked for turning into invariant.");
- assert(BackEdge == L->getLoopLatch() && "Wrong latch?");
- // If we already know the answer, take it from the map.
- auto I = IterationsToInvariance.find(Phi);
- if (I != IterationsToInvariance.end())
- return I->second;
-
- // Otherwise we need to analyze the input from the back edge.
- Value *Input = Phi->getIncomingValueForBlock(BackEdge);
- // Place infinity to map to avoid infinite recursion for cycled Phis. Such
- // cycles can never stop on an invariant.
- IterationsToInvariance[Phi] = InfiniteIterationsToInvariance;
- unsigned ToInvariance = InfiniteIterationsToInvariance;
-
- if (L->isLoopInvariant(Input))
- ToInvariance = 1u;
- else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
- // Only consider Phis in header block.
- if (IncPhi->getParent() != L->getHeader())
- return InfiniteIterationsToInvariance;
- // If the input becomes an invariant after X iterations, then our Phi
- // becomes an invariant after X + 1 iterations.
- unsigned InputToInvariance = calculateIterationsToInvariance(
- IncPhi, L, BackEdge, IterationsToInvariance);
- if (InputToInvariance != InfiniteIterationsToInvariance)
- ToInvariance = InputToInvariance + 1u;
- }
-
- // If we found that this Phi lies in an invariant chain, update the map.
- if (ToInvariance != InfiniteIterationsToInvariance)
- IterationsToInvariance[Phi] = ToInvariance;
- return ToInvariance;
-}
-
-// Return the number of iterations to peel off that make conditions in the
-// body true/false. For example, if we peel 2 iterations off the loop below,
-// the condition i < 2 can be evaluated at compile time.
-// for (i = 0; i < n; i++)
-// if (i < 2)
-// ..
-// else
-// ..
-// }
-static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
- ScalarEvolution &SE) {
- assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
- unsigned DesiredPeelCount = 0;
-
- for (auto *BB : L.blocks()) {
- auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
- if (!BI || BI->isUnconditional())
- continue;
-
- // Ignore loop exit condition.
- if (L.getLoopLatch() == BB)
- continue;
-
- Value *Condition = BI->getCondition();
- Value *LeftVal, *RightVal;
- CmpInst::Predicate Pred;
- if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
- continue;
-
- const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
- const SCEV *RightSCEV = SE.getSCEV(RightVal);
-
- // Do not consider predicates that are known to be true or false
- // independently of the loop iteration.
- if (SE.isKnownPredicate(Pred, LeftSCEV, RightSCEV) ||
- SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), LeftSCEV,
- RightSCEV))
- continue;
-
- // Check if we have a condition with one AddRec and one non AddRec
- // expression. Normalize LeftSCEV to be the AddRec.
- if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
- if (isa<SCEVAddRecExpr>(RightSCEV)) {
- std::swap(LeftSCEV, RightSCEV);
- Pred = ICmpInst::getSwappedPredicate(Pred);
- } else
- continue;
- }
-
- const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
-
- // Avoid huge SCEV computations in the loop below, make sure we only
- // consider AddRecs of the loop we are trying to peel and avoid
- // non-monotonic predicates, as we will not be able to simplify the loop
- // body.
- // FIXME: For the non-monotonic predicates ICMP_EQ and ICMP_NE we can
- // simplify the loop, if we peel 1 additional iteration, if there
- // is no wrapping.
- bool Increasing;
- if (!LeftAR->isAffine() || LeftAR->getLoop() != &L ||
- !SE.isMonotonicPredicate(LeftAR, Pred, Increasing))
- continue;
- (void)Increasing;
-
- // Check if extending the current DesiredPeelCount lets us evaluate Pred
- // or !Pred in the loop body statically.
- unsigned NewPeelCount = DesiredPeelCount;
-
- const SCEV *IterVal = LeftAR->evaluateAtIteration(
- SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
-
- // If the original condition is not known, get the negated predicate
- // (which holds on the else branch) and check if it is known. This allows
- // us to peel of iterations that make the original condition false.
- if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
- Pred = ICmpInst::getInversePredicate(Pred);
-
- const SCEV *Step = LeftAR->getStepRecurrence(SE);
- while (NewPeelCount < MaxPeelCount &&
- SE.isKnownPredicate(Pred, IterVal, RightSCEV)) {
- IterVal = SE.getAddExpr(IterVal, Step);
- NewPeelCount++;
- }
-
- // Only peel the loop if the monotonic predicate !Pred becomes known in the
- // first iteration of the loop body after peeling.
- if (NewPeelCount > DesiredPeelCount &&
- SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
- RightSCEV))
- DesiredPeelCount = NewPeelCount;
- }
-
- return DesiredPeelCount;
-}
-
-// Return the number of iterations we want to peel off.
-void llvm::computePeelCount(Loop *L, unsigned LoopSize,
- TargetTransformInfo::UnrollingPreferences &UP,
- unsigned &TripCount, ScalarEvolution &SE) {
- assert(LoopSize > 0 && "Zero loop size is not allowed!");
- // Save the UP.PeelCount value set by the target in
- // TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
- unsigned TargetPeelCount = UP.PeelCount;
- UP.PeelCount = 0;
- if (!canPeel(L))
- return;
-
- // Only try to peel innermost loops.
- if (!L->empty())
- return;
-
- // If the user provided a peel count, use that.
- bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
- if (UserPeelCount) {
- LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
- << " iterations.\n");
- UP.PeelCount = UnrollForcePeelCount;
- return;
- }
-
- // Skip peeling if it's disabled.
- if (!UP.AllowPeeling)
- return;
-
- // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
- // iterations of the loop. For this we compute the number for iterations after
- // which every Phi is guaranteed to become an invariant, and try to peel the
- // maximum number of iterations among these values, thus turning all those
- // Phis into invariants.
- // First, check that we can peel at least one iteration.
- if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) {
- // Store the pre-calculated values here.
- SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
- // Now go through all Phis to calculate their the number of iterations they
- // need to become invariants.
- // Start the max computation with the UP.PeelCount value set by the target
- // in TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
- unsigned DesiredPeelCount = TargetPeelCount;
- BasicBlock *BackEdge = L->getLoopLatch();
- assert(BackEdge && "Loop is not in simplified form?");
- for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
- PHINode *Phi = cast<PHINode>(&*BI);
- unsigned ToInvariance = calculateIterationsToInvariance(
- Phi, L, BackEdge, IterationsToInvariance);
- if (ToInvariance != InfiniteIterationsToInvariance)
- DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance);
- }
-
- // Pay respect to limitations implied by loop size and the max peel count.
- unsigned MaxPeelCount = UnrollPeelMaxCount;
- MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1);
-
- DesiredPeelCount = std::max(DesiredPeelCount,
- countToEliminateCompares(*L, MaxPeelCount, SE));
-
- if (DesiredPeelCount > 0) {
- DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
- // Consider max peel count limitation.
- assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
- LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
- << " iteration(s) to turn"
- << " some Phis into invariants.\n");
- UP.PeelCount = DesiredPeelCount;
- return;
- }
- }
-
- // Bail if we know the statically calculated trip count.
- // In this case we rather prefer partial unrolling.
- if (TripCount)
- return;
-
- // If we don't know the trip count, but have reason to believe the average
- // trip count is low, peeling should be beneficial, since we will usually
- // hit the peeled section.
- // We only do this in the presence of profile information, since otherwise
- // our estimates of the trip count are not reliable enough.
- if (L->getHeader()->getParent()->hasProfileData()) {
- Optional<unsigned> PeelCount = getLoopEstimatedTripCount(L);
- if (!PeelCount)
- return;
-
- LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount
- << "\n");
-
- if (*PeelCount) {
- if ((*PeelCount <= UnrollPeelMaxCount) &&
- (LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
- LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
- << " iterations.\n");
- UP.PeelCount = *PeelCount;
- return;
- }
- LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
- LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
- LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1)
- << "\n");
- LLVM_DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n");
- }
- }
-}
-
-/// Update the branch weights of the latch of a peeled-off loop
-/// iteration.
-/// This sets the branch weights for the latch of the recently peeled off loop
-/// iteration correctly.
-/// Our goal is to make sure that:
-/// a) The total weight of all the copies of the loop body is preserved.
-/// b) The total weight of the loop exit is preserved.
-/// c) The body weight is reasonably distributed between the peeled iterations.
-///
-/// \param Header The copy of the header block that belongs to next iteration.
-/// \param LatchBR The copy of the latch branch that belongs to this iteration.
-/// \param IterNumber The serial number of the iteration that was just
-/// peeled off.
-/// \param AvgIters The average number of iterations we expect the loop to have.
-/// \param[in,out] PeeledHeaderWeight The total number of dynamic loop
-/// iterations that are unaccounted for. As an input, it represents the number
-/// of times we expect to enter the header of the iteration currently being
-/// peeled off. The output is the number of times we expect to enter the
-/// header of the next iteration.
-static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
- unsigned IterNumber, unsigned AvgIters,
- uint64_t &PeeledHeaderWeight) {
- if (!PeeledHeaderWeight)
- return;
- // FIXME: Pick a more realistic distribution.
- // Currently the proportion of weight we assign to the fall-through
- // side of the branch drops linearly with the iteration number, and we use
- // a 0.9 fudge factor to make the drop-off less sharp...
- uint64_t FallThruWeight =
- PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9);
- uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight;
- PeeledHeaderWeight -= ExitWeight;
-
- unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
- MDBuilder MDB(LatchBR->getContext());
- MDNode *WeightNode =
- HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)
- : MDB.createBranchWeights(FallThruWeight, ExitWeight);
- LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
-}
-
-/// Initialize the weights.
-///
-/// \param Header The header block.
-/// \param LatchBR The latch branch.
-/// \param AvgIters The average number of iterations we expect the loop to have.
-/// \param[out] ExitWeight The # of times the edge from Latch to Exit is taken.
-/// \param[out] CurHeaderWeight The # of times the header is executed.
-static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
- unsigned AvgIters, uint64_t &ExitWeight,
- uint64_t &CurHeaderWeight) {
- uint64_t TrueWeight, FalseWeight;
- if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight))
- return;
- unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
- ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
- // The # of times the loop body executes is the sum of the exit block
- // is taken and the # of times the backedges are taken.
- CurHeaderWeight = TrueWeight + FalseWeight;
-}
-
-/// Update the weights of original Latch block after peeling off all iterations.
-///
-/// \param Header The header block.
-/// \param LatchBR The latch branch.
-/// \param ExitWeight The weight of the edge from Latch to Exit block.
-/// \param CurHeaderWeight The # of time the header is executed.
-static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
- uint64_t ExitWeight, uint64_t CurHeaderWeight) {
- // Adjust the branch weights on the loop exit.
- if (!ExitWeight)
- return;
-
- // The backedge count is the difference of current header weight and
- // current loop exit weight. If the current header weight is smaller than
- // the current loop exit weight, we mark the loop backedge weight as 1.
- uint64_t BackEdgeWeight = 0;
- if (ExitWeight < CurHeaderWeight)
- BackEdgeWeight = CurHeaderWeight - ExitWeight;
- else
- BackEdgeWeight = 1;
- MDBuilder MDB(LatchBR->getContext());
- unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
- MDNode *WeightNode =
- HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
- : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
- LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
-}
-
-/// Clones the body of the loop L, putting it between \p InsertTop and \p
-/// InsertBot.
-/// \param IterNumber The serial number of the iteration currently being
-/// peeled off.
-/// \param ExitEdges The exit edges of the original loop.
-/// \param[out] NewBlocks A list of the blocks in the newly created clone
-/// \param[out] VMap The value map between the loop and the new clone.
-/// \param LoopBlocks A helper for DFS-traversal of the loop.
-/// \param LVMap A value-map that maps instructions from the original loop to
-/// instructions in the last peeled-off iteration.
-static void cloneLoopBlocks(
- Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
- SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *> > &ExitEdges,
- SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
- ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
- LoopInfo *LI) {
- BasicBlock *Header = L->getHeader();
- BasicBlock *Latch = L->getLoopLatch();
- BasicBlock *PreHeader = L->getLoopPreheader();
-
- Function *F = Header->getParent();
- LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
- LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
- Loop *ParentLoop = L->getParentLoop();
-
- // For each block in the original loop, create a new copy,
- // and update the value map with the newly created values.
- for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
- BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
- NewBlocks.push_back(NewBB);
-
- if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(NewBB, *LI);
-
- VMap[*BB] = NewBB;
-
- // If dominator tree is available, insert nodes to represent cloned blocks.
- if (DT) {
- if (Header == *BB)
- DT->addNewBlock(NewBB, InsertTop);
- else {
- DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
- // VMap must contain entry for IDom, as the iteration order is RPO.
- DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
- }
- }
- }
-
- // Hook-up the control flow for the newly inserted blocks.
- // The new header is hooked up directly to the "top", which is either
- // the original loop preheader (for the first iteration) or the previous
- // iteration's exiting block (for every other iteration)
- InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
-
- // Similarly, for the latch:
- // The original exiting edge is still hooked up to the loop exit.
- // The backedge now goes to the "bottom", which is either the loop's real
- // header (for the last peeled iteration) or the copied header of the next
- // iteration (for every other iteration)
- BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
- BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator());
- for (unsigned idx = 0, e = LatchBR->getNumSuccessors(); idx < e; ++idx)
- if (LatchBR->getSuccessor(idx) == Header) {
- LatchBR->setSuccessor(idx, InsertBot);
- break;
- }
- if (DT)
- DT->changeImmediateDominator(InsertBot, NewLatch);
-
- // The new copy of the loop body starts with a bunch of PHI nodes
- // that pick an incoming value from either the preheader, or the previous
- // loop iteration. Since this copy is no longer part of the loop, we
- // resolve this statically:
- // For the first iteration, we use the value from the preheader directly.
- // For any other iteration, we replace the phi with the value generated by
- // the immediately preceding clone of the loop body (which represents
- // the previous iteration).
- for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
- PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
- if (IterNumber == 0) {
- VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
- } else {
- Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
- Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
- if (LatchInst && L->contains(LatchInst))
- VMap[&*I] = LVMap[LatchInst];
- else
- VMap[&*I] = LatchVal;
- }
- cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
- }
-
- // Fix up the outgoing values - we need to add a value for the iteration
- // we've just created. Note that this must happen *after* the incoming
- // values are adjusted, since the value going out of the latch may also be
- // a value coming into the header.
- for (auto Edge : ExitEdges)
- for (PHINode &PHI : Edge.second->phis()) {
- Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
- Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
- if (LatchInst && L->contains(LatchInst))
- LatchVal = VMap[LatchVal];
- PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
- }
-
- // LastValueMap is updated with the values for the current loop
- // which are used the next time this function is called.
- for (const auto &KV : VMap)
- LVMap[KV.first] = KV.second;
-}
-
-/// Peel off the first \p PeelCount iterations of loop \p L.
-///
-/// Note that this does not peel them off as a single straight-line block.
-/// Rather, each iteration is peeled off separately, and needs to check the
-/// exit condition.
-/// For loops that dynamically execute \p PeelCount iterations or less
-/// this provides a benefit, since the peeled off iterations, which account
-/// for the bulk of dynamic execution, can be further simplified by scalar
-/// optimizations.
-bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT,
- AssumptionCache *AC, bool PreserveLCSSA) {
- assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
- assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
-
- LoopBlocksDFS LoopBlocks(L);
- LoopBlocks.perform(LI);
-
- BasicBlock *Header = L->getHeader();
- BasicBlock *PreHeader = L->getLoopPreheader();
- BasicBlock *Latch = L->getLoopLatch();
- SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges;
- L->getExitEdges(ExitEdges);
-
- DenseMap<BasicBlock *, BasicBlock *> ExitIDom;
- if (DT) {
- assert(L->hasDedicatedExits() && "No dedicated exits?");
- for (auto Edge : ExitEdges) {
- if (ExitIDom.count(Edge.second))
- continue;
- BasicBlock *BB = DT->getNode(Edge.second)->getIDom()->getBlock();
- assert(L->contains(BB) && "IDom is not in a loop");
- ExitIDom[Edge.second] = BB;
- }
- }
-
- Function *F = Header->getParent();
-
- // Set up all the necessary basic blocks. It is convenient to split the
- // preheader into 3 parts - two blocks to anchor the peeled copy of the loop
- // body, and a new preheader for the "real" loop.
-
- // Peeling the first iteration transforms.
- //
- // PreHeader:
- // ...
- // Header:
- // LoopBody
- // If (cond) goto Header
- // Exit:
- //
- // into
- //
- // InsertTop:
- // LoopBody
- // If (!cond) goto Exit
- // InsertBot:
- // NewPreHeader:
- // ...
- // Header:
- // LoopBody
- // If (cond) goto Header
- // Exit:
- //
- // Each following iteration will split the current bottom anchor in two,
- // and put the new copy of the loop body between these two blocks. That is,
- // after peeling another iteration from the example above, we'll split
- // InsertBot, and get:
- //
- // InsertTop:
- // LoopBody
- // If (!cond) goto Exit
- // InsertBot:
- // LoopBody
- // If (!cond) goto Exit
- // InsertBot.next:
- // NewPreHeader:
- // ...
- // Header:
- // LoopBody
- // If (cond) goto Header
- // Exit:
-
- BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI);
- BasicBlock *InsertBot =
- SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI);
- BasicBlock *NewPreHeader =
- SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
-
- InsertTop->setName(Header->getName() + ".peel.begin");
- InsertBot->setName(Header->getName() + ".peel.next");
- NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
-
- ValueToValueMapTy LVMap;
-
- // If we have branch weight information, we'll want to update it for the
- // newly created branches.
- BranchInst *LatchBR =
- cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
- uint64_t ExitWeight = 0, CurHeaderWeight = 0;
- initBranchWeights(Header, LatchBR, PeelCount, ExitWeight, CurHeaderWeight);
-
- // For each peeled-off iteration, make a copy of the loop.
- for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
- SmallVector<BasicBlock *, 8> NewBlocks;
- ValueToValueMapTy VMap;
-
- // Subtract the exit weight from the current header weight -- the exit
- // weight is exactly the weight of the previous iteration's header.
- // FIXME: due to the way the distribution is constructed, we need a
- // guard here to make sure we don't end up with non-positive weights.
- if (ExitWeight < CurHeaderWeight)
- CurHeaderWeight -= ExitWeight;
- else
- CurHeaderWeight = 1;
-
- cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
- LoopBlocks, VMap, LVMap, DT, LI);
-
- // Remap to use values from the current iteration instead of the
- // previous one.
- remapInstructionsInBlocks(NewBlocks, VMap);
-
- if (DT) {
- // Latches of the cloned loops dominate over the loop exit, so idom of the
- // latter is the first cloned loop body, as original PreHeader dominates
- // the original loop body.
- if (Iter == 0)
- for (auto Exit : ExitIDom)
- DT->changeImmediateDominator(Exit.first,
- cast<BasicBlock>(LVMap[Exit.second]));
-#ifdef EXPENSIVE_CHECKS
- assert(DT->verify(DominatorTree::VerificationLevel::Fast));
-#endif
- }
-
- auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
- updateBranchWeights(InsertBot, LatchBRCopy, Iter,
- PeelCount, ExitWeight);
- // Remove Loop metadata from the latch branch instruction
- // because it is not the Loop's latch branch anymore.
- LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr);
-
- InsertTop = InsertBot;
- InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
- InsertBot->setName(Header->getName() + ".peel.next");
-
- F->getBasicBlockList().splice(InsertTop->getIterator(),
- F->getBasicBlockList(),
- NewBlocks[0]->getIterator(), F->end());
- }
-
- // Now adjust the phi nodes in the loop header to get their initial values
- // from the last peeled-off iteration instead of the preheader.
- for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
- PHINode *PHI = cast<PHINode>(I);
- Value *NewVal = PHI->getIncomingValueForBlock(Latch);
- Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
- if (LatchInst && L->contains(LatchInst))
- NewVal = LVMap[LatchInst];
-
- PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
- }
-
- fixupBranchWeights(Header, LatchBR, ExitWeight, CurHeaderWeight);
-
- if (Loop *ParentLoop = L->getParentLoop())
- L = ParentLoop;
-
- // We modified the loop, update SE.
- SE->forgetTopmostLoop(L);
-
- // Finally DomtTree must be correct.
- assert(DT->verify(DominatorTree::VerificationLevel::Fast));
-
- // FIXME: Incrementally update loop-simplify
- simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA);
-
- NumPeeled++;
-
- return true;
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
deleted file mode 100644
index d22fdb4d52dc..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ /dev/null
@@ -1,958 +0,0 @@
-//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements some loop unrolling utilities for loops with run-time
-// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
-// trip counts.
-//
-// The functions in this file are used to generate extra code when the
-// run-time trip count modulo the unroll factor is not 0. When this is the
-// case, we need to generate code to execute these 'left over' iterations.
-//
-// The current strategy generates an if-then-else sequence prior to the
-// unrolled loop to execute the 'left over' iterations before or after the
-// unrolled loop.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Utils/UnrollLoop.h"
-#include <algorithm>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "loop-unroll"
-
-STATISTIC(NumRuntimeUnrolled,
- "Number of loops unrolled with run-time trip counts");
-static cl::opt<bool> UnrollRuntimeMultiExit(
- "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
- cl::desc("Allow runtime unrolling for loops with multiple exits, when "
- "epilog is generated"));
-
-/// Connect the unrolling prolog code to the original loop.
-/// The unrolling prolog code contains code to execute the
-/// 'extra' iterations if the run-time trip count modulo the
-/// unroll count is non-zero.
-///
-/// This function performs the following:
-/// - Create PHI nodes at prolog end block to combine values
-/// that exit the prolog code and jump around the prolog.
-/// - Add a PHI operand to a PHI node at the loop exit block
-/// for values that exit the prolog and go around the loop.
-/// - Branch around the original loop if the trip count is less
-/// than the unroll factor.
-///
-static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
- BasicBlock *PrologExit,
- BasicBlock *OriginalLoopLatchExit,
- BasicBlock *PreHeader, BasicBlock *NewPreHeader,
- ValueToValueMapTy &VMap, DominatorTree *DT,
- LoopInfo *LI, bool PreserveLCSSA) {
- // Loop structure should be the following:
- // Preheader
- // PrologHeader
- // ...
- // PrologLatch
- // PrologExit
- // NewPreheader
- // Header
- // ...
- // Latch
- // LatchExit
- BasicBlock *Latch = L->getLoopLatch();
- assert(Latch && "Loop must have a latch");
- BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
-
- // Create a PHI node for each outgoing value from the original loop
- // (which means it is an outgoing value from the prolog code too).
- // The new PHI node is inserted in the prolog end basic block.
- // The new PHI node value is added as an operand of a PHI node in either
- // the loop header or the loop exit block.
- for (BasicBlock *Succ : successors(Latch)) {
- for (PHINode &PN : Succ->phis()) {
- // Add a new PHI node to the prolog end block and add the
- // appropriate incoming values.
- // TODO: This code assumes that the PrologExit (or the LatchExit block for
- // prolog loop) contains only one predecessor from the loop, i.e. the
- // PrologLatch. When supporting multiple-exiting block loops, we can have
- // two or more blocks that have the LatchExit as the target in the
- // original loop.
- PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
- PrologExit->getFirstNonPHI());
- // Adding a value to the new PHI node from the original loop preheader.
- // This is the value that skips all the prolog code.
- if (L->contains(&PN)) {
- // Succ is loop header.
- NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
- PreHeader);
- } else {
- // Succ is LatchExit.
- NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader);
- }
-
- Value *V = PN.getIncomingValueForBlock(Latch);
- if (Instruction *I = dyn_cast<Instruction>(V)) {
- if (L->contains(I)) {
- V = VMap.lookup(I);
- }
- }
- // Adding a value to the new PHI node from the last prolog block
- // that was created.
- NewPN->addIncoming(V, PrologLatch);
-
- // Update the existing PHI node operand with the value from the
- // new PHI node. How this is done depends on if the existing
- // PHI node is in the original loop block, or the exit block.
- if (L->contains(&PN))
- PN.setIncomingValueForBlock(NewPreHeader, NewPN);
- else
- PN.addIncoming(NewPN, PrologExit);
- }
- }
-
- // Make sure that created prolog loop is in simplified form
- SmallVector<BasicBlock *, 4> PrologExitPreds;
- Loop *PrologLoop = LI->getLoopFor(PrologLatch);
- if (PrologLoop) {
- for (BasicBlock *PredBB : predecessors(PrologExit))
- if (PrologLoop->contains(PredBB))
- PrologExitPreds.push_back(PredBB);
-
- SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
- nullptr, PreserveLCSSA);
- }
-
- // Create a branch around the original loop, which is taken if there are no
- // iterations remaining to be executed after running the prologue.
- Instruction *InsertPt = PrologExit->getTerminator();
- IRBuilder<> B(InsertPt);
-
- assert(Count != 0 && "nonsensical Count!");
-
- // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
- // This means %xtraiter is (BECount + 1) and all of the iterations of this
- // loop were executed by the prologue. Note that if BECount <u (Count - 1)
- // then (BECount + 1) cannot unsigned-overflow.
- Value *BrLoopExit =
- B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
- // Split the exit to maintain loop canonicalization guarantees
- SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
- SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
- nullptr, PreserveLCSSA);
- // Add the branch to the exit block (around the unrolled loop)
- B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
- InsertPt->eraseFromParent();
- if (DT)
- DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit);
-}
-
-/// Connect the unrolling epilog code to the original loop.
-/// The unrolling epilog code contains code to execute the
-/// 'extra' iterations if the run-time trip count modulo the
-/// unroll count is non-zero.
-///
-/// This function performs the following:
-/// - Update PHI nodes at the unrolling loop exit and epilog loop exit
-/// - Create PHI nodes at the unrolling loop exit to combine
-/// values that exit the unrolling loop code and jump around it.
-/// - Update PHI operands in the epilog loop by the new PHI nodes
-/// - Branch around the epilog loop if extra iters (ModVal) is zero.
-///
-static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
- BasicBlock *Exit, BasicBlock *PreHeader,
- BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
- ValueToValueMapTy &VMap, DominatorTree *DT,
- LoopInfo *LI, bool PreserveLCSSA) {
- BasicBlock *Latch = L->getLoopLatch();
- assert(Latch && "Loop must have a latch");
- BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
-
- // Loop structure should be the following:
- //
- // PreHeader
- // NewPreHeader
- // Header
- // ...
- // Latch
- // NewExit (PN)
- // EpilogPreHeader
- // EpilogHeader
- // ...
- // EpilogLatch
- // Exit (EpilogPN)
-
- // Update PHI nodes at NewExit and Exit.
- for (PHINode &PN : NewExit->phis()) {
- // PN should be used in another PHI located in Exit block as
- // Exit was split by SplitBlockPredecessors into Exit and NewExit
- // Basicaly it should look like:
- // NewExit:
- // PN = PHI [I, Latch]
- // ...
- // Exit:
- // EpilogPN = PHI [PN, EpilogPreHeader]
- //
- // There is EpilogPreHeader incoming block instead of NewExit as
- // NewExit was spilt 1 more time to get EpilogPreHeader.
- assert(PN.hasOneUse() && "The phi should have 1 use");
- PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());
- assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
-
- // Add incoming PreHeader from branch around the Loop
- PN.addIncoming(UndefValue::get(PN.getType()), PreHeader);
-
- Value *V = PN.getIncomingValueForBlock(Latch);
- Instruction *I = dyn_cast<Instruction>(V);
- if (I && L->contains(I))
- // If value comes from an instruction in the loop add VMap value.
- V = VMap.lookup(I);
- // For the instruction out of the loop, constant or undefined value
- // insert value itself.
- EpilogPN->addIncoming(V, EpilogLatch);
-
- assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
- "EpilogPN should have EpilogPreHeader incoming block");
- // Change EpilogPreHeader incoming block to NewExit.
- EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
- NewExit);
- // Now PHIs should look like:
- // NewExit:
- // PN = PHI [I, Latch], [undef, PreHeader]
- // ...
- // Exit:
- // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
- }
-
- // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
- // Update corresponding PHI nodes in epilog loop.
- for (BasicBlock *Succ : successors(Latch)) {
- // Skip this as we already updated phis in exit blocks.
- if (!L->contains(Succ))
- continue;
- for (PHINode &PN : Succ->phis()) {
- // Add new PHI nodes to the loop exit block and update epilog
- // PHIs with the new PHI values.
- PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
- NewExit->getFirstNonPHI());
- // Adding a value to the new PHI node from the unrolling loop preheader.
- NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
- // Adding a value to the new PHI node from the unrolling loop latch.
- NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
-
- // Update the existing PHI node operand with the value from the new PHI
- // node. Corresponding instruction in epilog loop should be PHI.
- PHINode *VPN = cast<PHINode>(VMap[&PN]);
- VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN);
- }
- }
-
- Instruction *InsertPt = NewExit->getTerminator();
- IRBuilder<> B(InsertPt);
- Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
- assert(Exit && "Loop must have a single exit block only");
- // Split the epilogue exit to maintain loop canonicalization guarantees
- SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
- SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
- PreserveLCSSA);
- // Add the branch to the exit block (around the unrolling loop)
- B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
- InsertPt->eraseFromParent();
- if (DT)
- DT->changeImmediateDominator(Exit, NewExit);
-
- // Split the main loop exit to maintain canonicalization guarantees.
- SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
- SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,
- PreserveLCSSA);
-}
-
-/// Create a clone of the blocks in a loop and connect them together.
-/// If CreateRemainderLoop is false, loop structure will not be cloned,
-/// otherwise a new loop will be created including all cloned blocks, and the
-/// iterator of it switches to count NewIter down to 0.
-/// The cloned blocks should be inserted between InsertTop and InsertBot.
-/// If loop structure is cloned InsertTop should be new preheader, InsertBot
-/// new loop exit.
-/// Return the new cloned loop that is created when CreateRemainderLoop is true.
-static Loop *
-CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
- const bool UseEpilogRemainder, const bool UnrollRemainder,
- BasicBlock *InsertTop,
- BasicBlock *InsertBot, BasicBlock *Preheader,
- std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
- ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
- StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
- BasicBlock *Header = L->getHeader();
- BasicBlock *Latch = L->getLoopLatch();
- Function *F = Header->getParent();
- LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
- LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
- Loop *ParentLoop = L->getParentLoop();
- NewLoopsMap NewLoops;
- NewLoops[ParentLoop] = ParentLoop;
- if (!CreateRemainderLoop)
- NewLoops[L] = ParentLoop;
-
- // For each block in the original loop, create a new copy,
- // and update the value map with the newly created values.
- for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
- BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
- NewBlocks.push_back(NewBB);
-
- // If we're unrolling the outermost loop, there's no remainder loop,
- // and this block isn't in a nested loop, then the new block is not
- // in any loop. Otherwise, add it to loopinfo.
- if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop)
- addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
-
- VMap[*BB] = NewBB;
- if (Header == *BB) {
- // For the first block, add a CFG connection to this newly
- // created block.
- InsertTop->getTerminator()->setSuccessor(0, NewBB);
- }
-
- if (DT) {
- if (Header == *BB) {
- // The header is dominated by the preheader.
- DT->addNewBlock(NewBB, InsertTop);
- } else {
- // Copy information from original loop to unrolled loop.
- BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
- DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
- }
- }
-
- if (Latch == *BB) {
- // For the last block, if CreateRemainderLoop is false, create a direct
- // jump to InsertBot. If not, create a loop back to cloned head.
- VMap.erase((*BB)->getTerminator());
- BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
- BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
- IRBuilder<> Builder(LatchBR);
- if (!CreateRemainderLoop) {
- Builder.CreateBr(InsertBot);
- } else {
- PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
- suffix + ".iter",
- FirstLoopBB->getFirstNonPHI());
- Value *IdxSub =
- Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
- NewIdx->getName() + ".sub");
- Value *IdxCmp =
- Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
- Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
- NewIdx->addIncoming(NewIter, InsertTop);
- NewIdx->addIncoming(IdxSub, NewBB);
- }
- LatchBR->eraseFromParent();
- }
- }
-
- // Change the incoming values to the ones defined in the preheader or
- // cloned loop.
- for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
- PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
- if (!CreateRemainderLoop) {
- if (UseEpilogRemainder) {
- unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
- NewPHI->setIncomingBlock(idx, InsertTop);
- NewPHI->removeIncomingValue(Latch, false);
- } else {
- VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
- cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
- }
- } else {
- unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
- NewPHI->setIncomingBlock(idx, InsertTop);
- BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
- idx = NewPHI->getBasicBlockIndex(Latch);
- Value *InVal = NewPHI->getIncomingValue(idx);
- NewPHI->setIncomingBlock(idx, NewLatch);
- if (Value *V = VMap.lookup(InVal))
- NewPHI->setIncomingValue(idx, V);
- }
- }
- if (CreateRemainderLoop) {
- Loop *NewLoop = NewLoops[L];
- MDNode *LoopID = NewLoop->getLoopID();
- assert(NewLoop && "L should have been cloned");
-
- // Only add loop metadata if the loop is not going to be completely
- // unrolled.
- if (UnrollRemainder)
- return NewLoop;
-
- Optional<MDNode *> NewLoopID = makeFollowupLoopID(
- LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
- if (NewLoopID.hasValue()) {
- NewLoop->setLoopID(NewLoopID.getValue());
-
- // Do not setLoopAlreadyUnrolled if loop attributes have been defined
- // explicitly.
- return NewLoop;
- }
-
- // Add unroll disable metadata to disable future unrolling for this loop.
- NewLoop->setLoopAlreadyUnrolled();
- return NewLoop;
- }
- else
- return nullptr;
-}
-
-/// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits
-/// is populated with all the loop exit blocks other than the LatchExit block.
-static bool canSafelyUnrollMultiExitLoop(Loop *L, BasicBlock *LatchExit,
- bool PreserveLCSSA,
- bool UseEpilogRemainder) {
-
- // We currently have some correctness constrains in unrolling a multi-exit
- // loop. Check for these below.
-
- // We rely on LCSSA form being preserved when the exit blocks are transformed.
- if (!PreserveLCSSA)
- return false;
-
- // TODO: Support multiple exiting blocks jumping to the `LatchExit` when
- // UnrollRuntimeMultiExit is true. This will need updating the logic in
- // connectEpilog/connectProlog.
- if (!LatchExit->getSinglePredecessor()) {
- LLVM_DEBUG(
- dbgs() << "Bailout for multi-exit handling when latch exit has >1 "
- "predecessor.\n");
- return false;
- }
- // FIXME: We bail out of multi-exit unrolling when epilog loop is generated
- // and L is an inner loop. This is because in presence of multiple exits, the
- // outer loop is incorrect: we do not add the EpilogPreheader and exit to the
- // outer loop. This is automatically handled in the prolog case, so we do not
- // have that bug in prolog generation.
- if (UseEpilogRemainder && L->getParentLoop())
- return false;
-
- // All constraints have been satisfied.
- return true;
-}
-
-/// Returns true if we can profitably unroll the multi-exit loop L. Currently,
-/// we return true only if UnrollRuntimeMultiExit is set to true.
-static bool canProfitablyUnrollMultiExitLoop(
- Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
- bool PreserveLCSSA, bool UseEpilogRemainder) {
-
-#if !defined(NDEBUG)
- assert(canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
- UseEpilogRemainder) &&
- "Should be safe to unroll before checking profitability!");
-#endif
-
- // Priority goes to UnrollRuntimeMultiExit if it's supplied.
- if (UnrollRuntimeMultiExit.getNumOccurrences())
- return UnrollRuntimeMultiExit;
-
- // The main pain point with multi-exit loop unrolling is that once unrolled,
- // we will not be able to merge all blocks into a straight line code.
- // There are branches within the unrolled loop that go to the OtherExits.
- // The second point is the increase in code size, but this is true
- // irrespective of multiple exits.
-
- // Note: Both the heuristics below are coarse grained. We are essentially
- // enabling unrolling of loops that have a single side exit other than the
- // normal LatchExit (i.e. exiting into a deoptimize block).
- // The heuristics considered are:
- // 1. low number of branches in the unrolled version.
- // 2. high predictability of these extra branches.
- // We avoid unrolling loops that have more than two exiting blocks. This
- // limits the total number of branches in the unrolled loop to be atmost
- // the unroll factor (since one of the exiting blocks is the latch block).
- SmallVector<BasicBlock*, 4> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
- if (ExitingBlocks.size() > 2)
- return false;
-
- // The second heuristic is that L has one exit other than the latchexit and
- // that exit is a deoptimize block. We know that deoptimize blocks are rarely
- // taken, which also implies the branch leading to the deoptimize block is
- // highly predictable.
- return (OtherExits.size() == 1 &&
- OtherExits[0]->getTerminatingDeoptimizeCall());
- // TODO: These can be fine-tuned further to consider code size or deopt states
- // that are captured by the deoptimize exit block.
- // Also, we can extend this to support more cases, if we actually
- // know of kinds of multiexit loops that would benefit from unrolling.
-}
-
-/// Insert code in the prolog/epilog code when unrolling a loop with a
-/// run-time trip-count.
-///
-/// This method assumes that the loop unroll factor is total number
-/// of loop bodies in the loop after unrolling. (Some folks refer
-/// to the unroll factor as the number of *extra* copies added).
-/// We assume also that the loop unroll factor is a power-of-two. So, after
-/// unrolling the loop, the number of loop bodies executed is 2,
-/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
-/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
-/// the switch instruction is generated.
-///
-/// ***Prolog case***
-/// extraiters = tripcount % loopfactor
-/// if (extraiters == 0) jump Loop:
-/// else jump Prol:
-/// Prol: LoopBody;
-/// extraiters -= 1 // Omitted if unroll factor is 2.
-/// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
-/// if (tripcount < loopfactor) jump End:
-/// Loop:
-/// ...
-/// End:
-///
-/// ***Epilog case***
-/// extraiters = tripcount % loopfactor
-/// if (tripcount < loopfactor) jump LoopExit:
-/// unroll_iters = tripcount - extraiters
-/// Loop: LoopBody; (executes unroll_iter times);
-/// unroll_iter -= 1
-/// if (unroll_iter != 0) jump Loop:
-/// LoopExit:
-/// if (extraiters == 0) jump EpilExit:
-/// Epil: LoopBody; (executes extraiters times)
-/// extraiters -= 1 // Omitted if unroll factor is 2.
-/// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
-/// EpilExit:
-
-bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
- bool AllowExpensiveTripCount,
- bool UseEpilogRemainder,
- bool UnrollRemainder, bool ForgetAllSCEV,
- LoopInfo *LI, ScalarEvolution *SE,
- DominatorTree *DT, AssumptionCache *AC,
- bool PreserveLCSSA, Loop **ResultLoop) {
- LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
- LLVM_DEBUG(L->dump());
- LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
- : dbgs() << "Using prolog remainder.\n");
-
- // Make sure the loop is in canonical form.
- if (!L->isLoopSimplifyForm()) {
- LLVM_DEBUG(dbgs() << "Not in simplify form!\n");
- return false;
- }
-
- // Guaranteed by LoopSimplifyForm.
- BasicBlock *Latch = L->getLoopLatch();
- BasicBlock *Header = L->getHeader();
-
- BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
-
- if (!LatchBR || LatchBR->isUnconditional()) {
- // The loop-rotate pass can be helpful to avoid this in many cases.
- LLVM_DEBUG(
- dbgs()
- << "Loop latch not terminated by a conditional branch.\n");
- return false;
- }
-
- unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
- BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
-
- if (L->contains(LatchExit)) {
- // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
- // targets of the Latch be an exit block out of the loop.
- LLVM_DEBUG(
- dbgs()
- << "One of the loop latch successors must be the exit block.\n");
- return false;
- }
-
- // These are exit blocks other than the target of the latch exiting block.
- SmallVector<BasicBlock *, 4> OtherExits;
- L->getUniqueNonLatchExitBlocks(OtherExits);
- bool isMultiExitUnrollingEnabled =
- canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
- UseEpilogRemainder) &&
- canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
- UseEpilogRemainder);
- // Support only single exit and exiting block unless multi-exit loop unrolling is enabled.
- if (!isMultiExitUnrollingEnabled &&
- (!L->getExitingBlock() || OtherExits.size())) {
- LLVM_DEBUG(
- dbgs()
- << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
- "enabled!\n");
- return false;
- }
- // Use Scalar Evolution to compute the trip count. This allows more loops to
- // be unrolled than relying on induction var simplification.
- if (!SE)
- return false;
-
- // Only unroll loops with a computable trip count, and the trip count needs
- // to be an int value (allowing a pointer type is a TODO item).
- // We calculate the backedge count by using getExitCount on the Latch block,
- // which is proven to be the only exiting block in this loop. This is same as
- // calculating getBackedgeTakenCount on the loop (which computes SCEV for all
- // exiting blocks).
- const SCEV *BECountSC = SE->getExitCount(L, Latch);
- if (isa<SCEVCouldNotCompute>(BECountSC) ||
- !BECountSC->getType()->isIntegerTy()) {
- LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");
- return false;
- }
-
- unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
-
- // Add 1 since the backedge count doesn't include the first loop iteration.
- const SCEV *TripCountSC =
- SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
- if (isa<SCEVCouldNotCompute>(TripCountSC)) {
- LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
- return false;
- }
-
- BasicBlock *PreHeader = L->getLoopPreheader();
- BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
- const DataLayout &DL = Header->getModule()->getDataLayout();
- SCEVExpander Expander(*SE, DL, "loop-unroll");
- if (!AllowExpensiveTripCount &&
- Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) {
- LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
- return false;
- }
-
- // This constraint lets us deal with an overflowing trip count easily; see the
- // comment on ModVal below.
- if (Log2_32(Count) > BEWidth) {
- LLVM_DEBUG(
- dbgs()
- << "Count failed constraint on overflow trip count calculation.\n");
- return false;
- }
-
- // Loop structure is the following:
- //
- // PreHeader
- // Header
- // ...
- // Latch
- // LatchExit
-
- BasicBlock *NewPreHeader;
- BasicBlock *NewExit = nullptr;
- BasicBlock *PrologExit = nullptr;
- BasicBlock *EpilogPreHeader = nullptr;
- BasicBlock *PrologPreHeader = nullptr;
-
- if (UseEpilogRemainder) {
- // If epilog remainder
- // Split PreHeader to insert a branch around loop for unrolling.
- NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
- NewPreHeader->setName(PreHeader->getName() + ".new");
- // Split LatchExit to create phi nodes from branch above.
- SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
- NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI,
- nullptr, PreserveLCSSA);
- // NewExit gets its DebugLoc from LatchExit, which is not part of the
- // original Loop.
- // Fix this by setting Loop's DebugLoc to NewExit.
- auto *NewExitTerminator = NewExit->getTerminator();
- NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
- // Split NewExit to insert epilog remainder loop.
- EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
- EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
- } else {
- // If prolog remainder
- // Split the original preheader twice to insert prolog remainder loop
- PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
- PrologPreHeader->setName(Header->getName() + ".prol.preheader");
- PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
- DT, LI);
- PrologExit->setName(Header->getName() + ".prol.loopexit");
- // Split PrologExit to get NewPreHeader.
- NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
- NewPreHeader->setName(PreHeader->getName() + ".new");
- }
- // Loop structure should be the following:
- // Epilog Prolog
- //
- // PreHeader PreHeader
- // *NewPreHeader *PrologPreHeader
- // Header *PrologExit
- // ... *NewPreHeader
- // Latch Header
- // *NewExit ...
- // *EpilogPreHeader Latch
- // LatchExit LatchExit
-
- // Calculate conditions for branch around loop for unrolling
- // in epilog case and around prolog remainder loop in prolog case.
- // Compute the number of extra iterations required, which is:
- // extra iterations = run-time trip count % loop unroll factor
- PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
- Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
- PreHeaderBR);
- Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
- PreHeaderBR);
- IRBuilder<> B(PreHeaderBR);
- Value *ModVal;
- // Calculate ModVal = (BECount + 1) % Count.
- // Note that TripCount is BECount + 1.
- if (isPowerOf2_32(Count)) {
- // When Count is power of 2 we don't BECount for epilog case, however we'll
- // need it for a branch around unrolling loop for prolog case.
- ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
- // 1. There are no iterations to be run in the prolog/epilog loop.
- // OR
- // 2. The addition computing TripCount overflowed.
- //
- // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
- // the number of iterations that remain to be run in the original loop is a
- // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
- // explicitly check this above).
- } else {
- // As (BECount + 1) can potentially unsigned overflow we count
- // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
- Value *ModValTmp = B.CreateURem(BECount,
- ConstantInt::get(BECount->getType(),
- Count));
- Value *ModValAdd = B.CreateAdd(ModValTmp,
- ConstantInt::get(ModValTmp->getType(), 1));
- // At that point (BECount % Count) + 1 could be equal to Count.
- // To handle this case we need to take mod by Count one more time.
- ModVal = B.CreateURem(ModValAdd,
- ConstantInt::get(BECount->getType(), Count),
- "xtraiter");
- }
- Value *BranchVal =
- UseEpilogRemainder ? B.CreateICmpULT(BECount,
- ConstantInt::get(BECount->getType(),
- Count - 1)) :
- B.CreateIsNotNull(ModVal, "lcmp.mod");
- BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
- BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
- // Branch to either remainder (extra iterations) loop or unrolling loop.
- B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
- PreHeaderBR->eraseFromParent();
- if (DT) {
- if (UseEpilogRemainder)
- DT->changeImmediateDominator(NewExit, PreHeader);
- else
- DT->changeImmediateDominator(PrologExit, PreHeader);
- }
- Function *F = Header->getParent();
- // Get an ordered list of blocks in the loop to help with the ordering of the
- // cloned blocks in the prolog/epilog code
- LoopBlocksDFS LoopBlocks(L);
- LoopBlocks.perform(LI);
-
- //
- // For each extra loop iteration, create a copy of the loop's basic blocks
- // and generate a condition that branches to the copy depending on the
- // number of 'left over' iterations.
- //
- std::vector<BasicBlock *> NewBlocks;
- ValueToValueMapTy VMap;
-
- // For unroll factor 2 remainder loop will have 1 iterations.
- // Do not create 1 iteration loop.
- bool CreateRemainderLoop = (Count != 2);
-
- // Clone all the basic blocks in the loop. If Count is 2, we don't clone
- // the loop, otherwise we create a cloned loop to execute the extra
- // iterations. This function adds the appropriate CFG connections.
- BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
- BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
- Loop *remainderLoop = CloneLoopBlocks(
- L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
- InsertTop, InsertBot,
- NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
-
- // Insert the cloned blocks into the function.
- F->getBasicBlockList().splice(InsertBot->getIterator(),
- F->getBasicBlockList(),
- NewBlocks[0]->getIterator(),
- F->end());
-
- // Now the loop blocks are cloned and the other exiting blocks from the
- // remainder are connected to the original Loop's exit blocks. The remaining
- // work is to update the phi nodes in the original loop, and take in the
- // values from the cloned region.
- for (auto *BB : OtherExits) {
- for (auto &II : *BB) {
-
- // Given we preserve LCSSA form, we know that the values used outside the
- // loop will be used through these phi nodes at the exit blocks that are
- // transformed below.
- if (!isa<PHINode>(II))
- break;
- PHINode *Phi = cast<PHINode>(&II);
- unsigned oldNumOperands = Phi->getNumIncomingValues();
- // Add the incoming values from the remainder code to the end of the phi
- // node.
- for (unsigned i =0; i < oldNumOperands; i++){
- Value *newVal = VMap.lookup(Phi->getIncomingValue(i));
- // newVal can be a constant or derived from values outside the loop, and
- // hence need not have a VMap value. Also, since lookup already generated
- // a default "null" VMap entry for this value, we need to populate that
- // VMap entry correctly, with the mapped entry being itself.
- if (!newVal) {
- newVal = Phi->getIncomingValue(i);
- VMap[Phi->getIncomingValue(i)] = Phi->getIncomingValue(i);
- }
- Phi->addIncoming(newVal,
- cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
- }
- }
-#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
- for (BasicBlock *SuccBB : successors(BB)) {
- assert(!(any_of(OtherExits,
- [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) ||
- SuccBB == LatchExit) &&
- "Breaks the definition of dedicated exits!");
- }
-#endif
- }
-
- // Update the immediate dominator of the exit blocks and blocks that are
- // reachable from the exit blocks. This is needed because we now have paths
- // from both the original loop and the remainder code reaching the exit
- // blocks. While the IDom of these exit blocks were from the original loop,
- // now the IDom is the preheader (which decides whether the original loop or
- // remainder code should run).
- if (DT && !L->getExitingBlock()) {
- SmallVector<BasicBlock *, 16> ChildrenToUpdate;
- // NB! We have to examine the dom children of all loop blocks, not just
- // those which are the IDom of the exit blocks. This is because blocks
- // reachable from the exit blocks can have their IDom as the nearest common
- // dominator of the exit blocks.
- for (auto *BB : L->blocks()) {
- auto *DomNodeBB = DT->getNode(BB);
- for (auto *DomChild : DomNodeBB->getChildren()) {
- auto *DomChildBB = DomChild->getBlock();
- if (!L->contains(LI->getLoopFor(DomChildBB)))
- ChildrenToUpdate.push_back(DomChildBB);
- }
- }
- for (auto *BB : ChildrenToUpdate)
- DT->changeImmediateDominator(BB, PreHeader);
- }
-
- // Loop structure should be the following:
- // Epilog Prolog
- //
- // PreHeader PreHeader
- // NewPreHeader PrologPreHeader
- // Header PrologHeader
- // ... ...
- // Latch PrologLatch
- // NewExit PrologExit
- // EpilogPreHeader NewPreHeader
- // EpilogHeader Header
- // ... ...
- // EpilogLatch Latch
- // LatchExit LatchExit
-
- // Rewrite the cloned instruction operands to use the values created when the
- // clone is created.
- for (BasicBlock *BB : NewBlocks) {
- for (Instruction &I : *BB) {
- RemapInstruction(&I, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- }
- }
-
- if (UseEpilogRemainder) {
- // Connect the epilog code to the original loop and update the
- // PHI functions.
- ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader,
- EpilogPreHeader, NewPreHeader, VMap, DT, LI,
- PreserveLCSSA);
-
- // Update counter in loop for unrolling.
- // I should be multiply of Count.
- IRBuilder<> B2(NewPreHeader->getTerminator());
- Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
- BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
- B2.SetInsertPoint(LatchBR);
- PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
- Header->getFirstNonPHI());
- Value *IdxSub =
- B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
- NewIdx->getName() + ".nsub");
- Value *IdxCmp;
- if (LatchBR->getSuccessor(0) == Header)
- IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
- else
- IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
- NewIdx->addIncoming(TestVal, NewPreHeader);
- NewIdx->addIncoming(IdxSub, Latch);
- LatchBR->setCondition(IdxCmp);
- } else {
- // Connect the prolog code to the original loop and update the
- // PHI functions.
- ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
- NewPreHeader, VMap, DT, LI, PreserveLCSSA);
- }
-
- // If this loop is nested, then the loop unroller changes the code in the any
- // of its parent loops, so the Scalar Evolution pass needs to be run again.
- SE->forgetTopmostLoop(L);
-
- // Verify that the Dom Tree is correct.
-#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
- if (DT)
- assert(DT->verify(DominatorTree::VerificationLevel::Full));
-#endif
-
- // Canonicalize to LoopSimplifyForm both original and remainder loops. We
- // cannot rely on the LoopUnrollPass to do this because it only does
- // canonicalization for parent/subloops and not the sibling loops.
- if (OtherExits.size() > 0) {
- // Generate dedicated exit blocks for the original loop, to preserve
- // LoopSimplifyForm.
- formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);
- // Generate dedicated exit blocks for the remainder loop if one exists, to
- // preserve LoopSimplifyForm.
- if (remainderLoop)
- formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA);
- }
-
- auto UnrollResult = LoopUnrollResult::Unmodified;
- if (remainderLoop && UnrollRemainder) {
- LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
- UnrollResult =
- UnrollLoop(remainderLoop,
- {/*Count*/ Count - 1, /*TripCount*/ Count - 1,
- /*Force*/ false, /*AllowRuntime*/ false,
- /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
- /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
- /*PeelCount*/ 0, /*UnrollRemainder*/ false, ForgetAllSCEV},
- LI, SE, DT, AC, /*ORE*/ nullptr, PreserveLCSSA);
- }
-
- if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
- *ResultLoop = remainderLoop;
- NumRuntimeUnrolled++;
- return true;
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
deleted file mode 100644
index ec226e65f650..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ /dev/null
@@ -1,976 +0,0 @@
-//===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines common loop utility functions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/ADT/ScopeExit.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/MemorySSAUpdater.h"
-#include "llvm/Analysis/MustExecute.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/ValueHandle.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/KnownBits.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-
-using namespace llvm;
-using namespace llvm::PatternMatch;
-
-#define DEBUG_TYPE "loop-utils"
-
-static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
-
-bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
- MemorySSAUpdater *MSSAU,
- bool PreserveLCSSA) {
- bool Changed = false;
-
- // We re-use a vector for the in-loop predecesosrs.
- SmallVector<BasicBlock *, 4> InLoopPredecessors;
-
- auto RewriteExit = [&](BasicBlock *BB) {
- assert(InLoopPredecessors.empty() &&
- "Must start with an empty predecessors list!");
- auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });
-
- // See if there are any non-loop predecessors of this exit block and
- // keep track of the in-loop predecessors.
- bool IsDedicatedExit = true;
- for (auto *PredBB : predecessors(BB))
- if (L->contains(PredBB)) {
- if (isa<IndirectBrInst>(PredBB->getTerminator()))
- // We cannot rewrite exiting edges from an indirectbr.
- return false;
- if (isa<CallBrInst>(PredBB->getTerminator()))
- // We cannot rewrite exiting edges from a callbr.
- return false;
-
- InLoopPredecessors.push_back(PredBB);
- } else {
- IsDedicatedExit = false;
- }
-
- assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
-
- // Nothing to do if this is already a dedicated exit.
- if (IsDedicatedExit)
- return false;
-
- auto *NewExitBB = SplitBlockPredecessors(
- BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA);
-
- if (!NewExitBB)
- LLVM_DEBUG(
- dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
- << *L << "\n");
- else
- LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
- << NewExitBB->getName() << "\n");
- return true;
- };
-
- // Walk the exit blocks directly rather than building up a data structure for
- // them, but only visit each one once.
- SmallPtrSet<BasicBlock *, 4> Visited;
- for (auto *BB : L->blocks())
- for (auto *SuccBB : successors(BB)) {
- // We're looking for exit blocks so skip in-loop successors.
- if (L->contains(SuccBB))
- continue;
-
- // Visit each exit block exactly once.
- if (!Visited.insert(SuccBB).second)
- continue;
-
- Changed |= RewriteExit(SuccBB);
- }
-
- return Changed;
-}
-
-/// Returns the instructions that use values defined in the loop.
-SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {
- SmallVector<Instruction *, 8> UsedOutside;
-
- for (auto *Block : L->getBlocks())
- // FIXME: I believe that this could use copy_if if the Inst reference could
- // be adapted into a pointer.
- for (auto &Inst : *Block) {
- auto Users = Inst.users();
- if (any_of(Users, [&](User *U) {
- auto *Use = cast<Instruction>(U);
- return !L->contains(Use->getParent());
- }))
- UsedOutside.push_back(&Inst);
- }
-
- return UsedOutside;
-}
-
-void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) {
- // By definition, all loop passes need the LoopInfo analysis and the
- // Dominator tree it depends on. Because they all participate in the loop
- // pass manager, they must also preserve these.
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
-
- // We must also preserve LoopSimplify and LCSSA. We locally access their IDs
- // here because users shouldn't directly get them from this header.
- extern char &LoopSimplifyID;
- extern char &LCSSAID;
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addPreservedID(LCSSAID);
- // This is used in the LPPassManager to perform LCSSA verification on passes
- // which preserve lcssa form
- AU.addRequired<LCSSAVerificationPass>();
- AU.addPreserved<LCSSAVerificationPass>();
-
- // Loop passes are designed to run inside of a loop pass manager which means
- // that any function analyses they require must be required by the first loop
- // pass in the manager (so that it is computed before the loop pass manager
- // runs) and preserved by all loop pasess in the manager. To make this
- // reasonably robust, the set needed for most loop passes is maintained here.
- // If your loop pass requires an analysis not listed here, you will need to
- // carefully audit the loop pass manager nesting structure that results.
- AU.addRequired<AAResultsWrapperPass>();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addPreserved<BasicAAWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<SCEVAAWrapperPass>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addPreserved<ScalarEvolutionWrapperPass>();
-}
-
-/// Manually defined generic "LoopPass" dependency initialization. This is used
-/// to initialize the exact set of passes from above in \c
-/// getLoopAnalysisUsage. It can be used within a loop pass's initialization
-/// with:
-///
-/// INITIALIZE_PASS_DEPENDENCY(LoopPass)
-///
-/// As-if "LoopPass" were a pass.
-void llvm::initializeLoopPassPass(PassRegistry &Registry) {
- INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
- INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
- INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
- INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
- INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
- INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
- INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
- INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
- INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-}
-
-/// Find string metadata for loop
-///
-/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
-/// operand or null otherwise. If the string metadata is not found return
-/// Optional's not-a-value.
-Optional<const MDOperand *> llvm::findStringMetadataForLoop(const Loop *TheLoop,
- StringRef Name) {
- MDNode *MD = findOptionMDForLoop(TheLoop, Name);
- if (!MD)
- return None;
- switch (MD->getNumOperands()) {
- case 1:
- return nullptr;
- case 2:
- return &MD->getOperand(1);
- default:
- llvm_unreachable("loop metadata has 0 or 1 operand");
- }
-}
-
-static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
- StringRef Name) {
- MDNode *MD = findOptionMDForLoop(TheLoop, Name);
- if (!MD)
- return None;
- switch (MD->getNumOperands()) {
- case 1:
- // When the value is absent it is interpreted as 'attribute set'.
- return true;
- case 2:
- if (ConstantInt *IntMD =
- mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get()))
- return IntMD->getZExtValue();
- return true;
- }
- llvm_unreachable("unexpected number of options");
-}
-
-static bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) {
- return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false);
-}
-
-llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop,
- StringRef Name) {
- const MDOperand *AttrMD =
- findStringMetadataForLoop(TheLoop, Name).getValueOr(nullptr);
- if (!AttrMD)
- return None;
-
- ConstantInt *IntMD = mdconst::extract_or_null<ConstantInt>(AttrMD->get());
- if (!IntMD)
- return None;
-
- return IntMD->getSExtValue();
-}
-
-Optional<MDNode *> llvm::makeFollowupLoopID(
- MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
- const char *InheritOptionsExceptPrefix, bool AlwaysNew) {
- if (!OrigLoopID) {
- if (AlwaysNew)
- return nullptr;
- return None;
- }
-
- assert(OrigLoopID->getOperand(0) == OrigLoopID);
-
- bool InheritAllAttrs = !InheritOptionsExceptPrefix;
- bool InheritSomeAttrs =
- InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0';
- SmallVector<Metadata *, 8> MDs;
- MDs.push_back(nullptr);
-
- bool Changed = false;
- if (InheritAllAttrs || InheritSomeAttrs) {
- for (const MDOperand &Existing : drop_begin(OrigLoopID->operands(), 1)) {
- MDNode *Op = cast<MDNode>(Existing.get());
-
- auto InheritThisAttribute = [InheritSomeAttrs,
- InheritOptionsExceptPrefix](MDNode *Op) {
- if (!InheritSomeAttrs)
- return false;
-
- // Skip malformatted attribute metadata nodes.
- if (Op->getNumOperands() == 0)
- return true;
- Metadata *NameMD = Op->getOperand(0).get();
- if (!isa<MDString>(NameMD))
- return true;
- StringRef AttrName = cast<MDString>(NameMD)->getString();
-
- // Do not inherit excluded attributes.
- return !AttrName.startswith(InheritOptionsExceptPrefix);
- };
-
- if (InheritThisAttribute(Op))
- MDs.push_back(Op);
- else
- Changed = true;
- }
- } else {
- // Modified if we dropped at least one attribute.
- Changed = OrigLoopID->getNumOperands() > 1;
- }
-
- bool HasAnyFollowup = false;
- for (StringRef OptionName : FollowupOptions) {
- MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName);
- if (!FollowupNode)
- continue;
-
- HasAnyFollowup = true;
- for (const MDOperand &Option : drop_begin(FollowupNode->operands(), 1)) {
- MDs.push_back(Option.get());
- Changed = true;
- }
- }
-
- // Attributes of the followup loop not specified explicity, so signal to the
- // transformation pass to add suitable attributes.
- if (!AlwaysNew && !HasAnyFollowup)
- return None;
-
- // If no attributes were added or remove, the previous loop Id can be reused.
- if (!AlwaysNew && !Changed)
- return OrigLoopID;
-
- // No attributes is equivalent to having no !llvm.loop metadata at all.
- if (MDs.size() == 1)
- return nullptr;
-
- // Build the new loop ID.
- MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs);
- FollowupLoopID->replaceOperandWith(0, FollowupLoopID);
- return FollowupLoopID;
-}
-
-bool llvm::hasDisableAllTransformsHint(const Loop *L) {
- return getBooleanLoopAttribute(L, LLVMLoopDisableNonforced);
-}
-
-TransformationMode llvm::hasUnrollTransformation(Loop *L) {
- if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
- return TM_SuppressedByUser;
-
- Optional<int> Count =
- getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
- if (Count.hasValue())
- return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
-
- if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"))
- return TM_ForcedByUser;
-
- if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full"))
- return TM_ForcedByUser;
-
- if (hasDisableAllTransformsHint(L))
- return TM_Disable;
-
- return TM_Unspecified;
-}
-
-TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) {
- if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))
- return TM_SuppressedByUser;
-
- Optional<int> Count =
- getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count");
- if (Count.hasValue())
- return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
-
- if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable"))
- return TM_ForcedByUser;
-
- if (hasDisableAllTransformsHint(L))
- return TM_Disable;
-
- return TM_Unspecified;
-}
-
-TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
- Optional<bool> Enable =
- getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");
-
- if (Enable == false)
- return TM_SuppressedByUser;
-
- Optional<int> VectorizeWidth =
- getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width");
- Optional<int> InterleaveCount =
- getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
-
- // 'Forcing' vector width and interleave count to one effectively disables
- // this tranformation.
- if (Enable == true && VectorizeWidth == 1 && InterleaveCount == 1)
- return TM_SuppressedByUser;
-
- if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
- return TM_Disable;
-
- if (Enable == true)
- return TM_ForcedByUser;
-
- if (VectorizeWidth == 1 && InterleaveCount == 1)
- return TM_Disable;
-
- if (VectorizeWidth > 1 || InterleaveCount > 1)
- return TM_Enable;
-
- if (hasDisableAllTransformsHint(L))
- return TM_Disable;
-
- return TM_Unspecified;
-}
-
-TransformationMode llvm::hasDistributeTransformation(Loop *L) {
- if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))
- return TM_ForcedByUser;
-
- if (hasDisableAllTransformsHint(L))
- return TM_Disable;
-
- return TM_Unspecified;
-}
-
-TransformationMode llvm::hasLICMVersioningTransformation(Loop *L) {
- if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))
- return TM_SuppressedByUser;
-
- if (hasDisableAllTransformsHint(L))
- return TM_Disable;
-
- return TM_Unspecified;
-}
-
-/// Does a BFS from a given node to all of its children inside a given loop.
-/// The returned vector of nodes includes the starting point.
-SmallVector<DomTreeNode *, 16>
-llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) {
- SmallVector<DomTreeNode *, 16> Worklist;
- auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
- // Only include subregions in the top level loop.
- BasicBlock *BB = DTN->getBlock();
- if (CurLoop->contains(BB))
- Worklist.push_back(DTN);
- };
-
- AddRegionToWorklist(N);
-
- for (size_t I = 0; I < Worklist.size(); I++)
- for (DomTreeNode *Child : Worklist[I]->getChildren())
- AddRegionToWorklist(Child);
-
- return Worklist;
-}
-
-void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
- ScalarEvolution *SE = nullptr,
- LoopInfo *LI = nullptr) {
- assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
- auto *Preheader = L->getLoopPreheader();
- assert(Preheader && "Preheader should exist!");
-
- // Now that we know the removal is safe, remove the loop by changing the
- // branch from the preheader to go to the single exit block.
- //
- // Because we're deleting a large chunk of code at once, the sequence in which
- // we remove things is very important to avoid invalidation issues.
-
- // Tell ScalarEvolution that the loop is deleted. Do this before
- // deleting the loop so that ScalarEvolution can look at the loop
- // to determine what it needs to clean up.
- if (SE)
- SE->forgetLoop(L);
-
- auto *ExitBlock = L->getUniqueExitBlock();
- assert(ExitBlock && "Should have a unique exit block!");
- assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
-
- auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator());
- assert(OldBr && "Preheader must end with a branch");
- assert(OldBr->isUnconditional() && "Preheader must have a single successor");
- // Connect the preheader to the exit block. Keep the old edge to the header
- // around to perform the dominator tree update in two separate steps
- // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
- // preheader -> header.
- //
- //
- // 0. Preheader 1. Preheader 2. Preheader
- // | | | |
- // V | V |
- // Header <--\ | Header <--\ | Header <--\
- // | | | | | | | | | | |
- // | V | | | V | | | V |
- // | Body --/ | | Body --/ | | Body --/
- // V V V V V
- // Exit Exit Exit
- //
- // By doing this is two separate steps we can perform the dominator tree
- // update without using the batch update API.
- //
- // Even when the loop is never executed, we cannot remove the edge from the
- // source block to the exit block. Consider the case where the unexecuted loop
- // branches back to an outer loop. If we deleted the loop and removed the edge
- // coming to this inner loop, this will break the outer loop structure (by
- // deleting the backedge of the outer loop). If the outer loop is indeed a
- // non-loop, it will be deleted in a future iteration of loop deletion pass.
- IRBuilder<> Builder(OldBr);
- Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
- // Remove the old branch. The conditional branch becomes a new terminator.
- OldBr->eraseFromParent();
-
- // Rewrite phis in the exit block to get their inputs from the Preheader
- // instead of the exiting block.
- for (PHINode &P : ExitBlock->phis()) {
- // Set the zero'th element of Phi to be from the preheader and remove all
- // other incoming values. Given the loop has dedicated exits, all other
- // incoming values must be from the exiting blocks.
- int PredIndex = 0;
- P.setIncomingBlock(PredIndex, Preheader);
- // Removes all incoming values from all other exiting blocks (including
- // duplicate values from an exiting block).
- // Nuke all entries except the zero'th entry which is the preheader entry.
- // NOTE! We need to remove Incoming Values in the reverse order as done
- // below, to keep the indices valid for deletion (removeIncomingValues
- // updates getNumIncomingValues and shifts all values down into the operand
- // being deleted).
- for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i)
- P.removeIncomingValue(e - i, false);
-
- assert((P.getNumIncomingValues() == 1 &&
- P.getIncomingBlock(PredIndex) == Preheader) &&
- "Should have exactly one value and that's from the preheader!");
- }
-
- // Disconnect the loop body by branching directly to its exit.
- Builder.SetInsertPoint(Preheader->getTerminator());
- Builder.CreateBr(ExitBlock);
- // Remove the old branch.
- Preheader->getTerminator()->eraseFromParent();
-
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
- if (DT) {
- // Update the dominator tree by informing it about the new edge from the
- // preheader to the exit and the removed edge.
- DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock},
- {DominatorTree::Delete, Preheader, L->getHeader()}});
- }
-
- // Use a map to unique and a vector to guarantee deterministic ordering.
- llvm::SmallDenseSet<std::pair<DIVariable *, DIExpression *>, 4> DeadDebugSet;
- llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst;
-
- // Given LCSSA form is satisfied, we should not have users of instructions
- // within the dead loop outside of the loop. However, LCSSA doesn't take
- // unreachable uses into account. We handle them here.
- // We could do it after drop all references (in this case all users in the
- // loop will be already eliminated and we have less work to do but according
- // to API doc of User::dropAllReferences only valid operation after dropping
- // references, is deletion. So let's substitute all usages of
- // instruction from the loop with undef value of corresponding type first.
- for (auto *Block : L->blocks())
- for (Instruction &I : *Block) {
- auto *Undef = UndefValue::get(I.getType());
- for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E;) {
- Use &U = *UI;
- ++UI;
- if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
- if (L->contains(Usr->getParent()))
- continue;
- // If we have a DT then we can check that uses outside a loop only in
- // unreachable block.
- if (DT)
- assert(!DT->isReachableFromEntry(U) &&
- "Unexpected user in reachable block");
- U.set(Undef);
- }
- auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
- if (!DVI)
- continue;
- auto Key = DeadDebugSet.find({DVI->getVariable(), DVI->getExpression()});
- if (Key != DeadDebugSet.end())
- continue;
- DeadDebugSet.insert({DVI->getVariable(), DVI->getExpression()});
- DeadDebugInst.push_back(DVI);
- }
-
- // After the loop has been deleted all the values defined and modified
- // inside the loop are going to be unavailable.
- // Since debug values in the loop have been deleted, inserting an undef
- // dbg.value truncates the range of any dbg.value before the loop where the
- // loop used to be. This is particularly important for constant values.
- DIBuilder DIB(*ExitBlock->getModule());
- Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI();
- assert(InsertDbgValueBefore &&
- "There should be a non-PHI instruction in exit block, else these "
- "instructions will have no parent.");
- for (auto *DVI : DeadDebugInst)
- DIB.insertDbgValueIntrinsic(UndefValue::get(Builder.getInt32Ty()),
- DVI->getVariable(), DVI->getExpression(),
- DVI->getDebugLoc(), InsertDbgValueBefore);
-
- // Remove the block from the reference counting scheme, so that we can
- // delete it freely later.
- for (auto *Block : L->blocks())
- Block->dropAllReferences();
-
- if (LI) {
- // Erase the instructions and the blocks without having to worry
- // about ordering because we already dropped the references.
- // NOTE: This iteration is safe because erasing the block does not remove
- // its entry from the loop's block list. We do that in the next section.
- for (Loop::block_iterator LpI = L->block_begin(), LpE = L->block_end();
- LpI != LpE; ++LpI)
- (*LpI)->eraseFromParent();
-
- // Finally, the blocks from loopinfo. This has to happen late because
- // otherwise our loop iterators won't work.
-
- SmallPtrSet<BasicBlock *, 8> blocks;
- blocks.insert(L->block_begin(), L->block_end());
- for (BasicBlock *BB : blocks)
- LI->removeBlock(BB);
-
- // The last step is to update LoopInfo now that we've eliminated this loop.
- LI->erase(L);
- }
-}
-
-Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
- // Support loops with an exiting latch and other existing exists only
- // deoptimize.
-
- // Get the branch weights for the loop's backedge.
- BasicBlock *Latch = L->getLoopLatch();
- if (!Latch)
- return None;
- BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
- if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
- return None;
-
- assert((LatchBR->getSuccessor(0) == L->getHeader() ||
- LatchBR->getSuccessor(1) == L->getHeader()) &&
- "At least one edge out of the latch must go to the header");
-
- SmallVector<BasicBlock *, 4> ExitBlocks;
- L->getUniqueNonLatchExitBlocks(ExitBlocks);
- if (any_of(ExitBlocks, [](const BasicBlock *EB) {
- return !EB->getTerminatingDeoptimizeCall();
- }))
- return None;
-
- // To estimate the number of times the loop body was executed, we want to
- // know the number of times the backedge was taken, vs. the number of times
- // we exited the loop.
- uint64_t TrueVal, FalseVal;
- if (!LatchBR->extractProfMetadata(TrueVal, FalseVal))
- return None;
-
- if (!TrueVal || !FalseVal)
- return 0;
-
- // Divide the count of the backedge by the count of the edge exiting the loop,
- // rounding to nearest.
- if (LatchBR->getSuccessor(0) == L->getHeader())
- return (TrueVal + (FalseVal / 2)) / FalseVal;
- else
- return (FalseVal + (TrueVal / 2)) / TrueVal;
-}
-
-bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
- ScalarEvolution &SE) {
- Loop *OuterL = InnerLoop->getParentLoop();
- if (!OuterL)
- return true;
-
- // Get the backedge taken count for the inner loop
- BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
- const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch);
- if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) ||
- !InnerLoopBECountSC->getType()->isIntegerTy())
- return false;
-
- // Get whether count is invariant to the outer loop
- ScalarEvolution::LoopDisposition LD =
- SE.getLoopDisposition(InnerLoopBECountSC, OuterL);
- if (LD != ScalarEvolution::LoopInvariant)
- return false;
-
- return true;
-}
-
-Value *llvm::createMinMaxOp(IRBuilder<> &Builder,
- RecurrenceDescriptor::MinMaxRecurrenceKind RK,
- Value *Left, Value *Right) {
- CmpInst::Predicate P = CmpInst::ICMP_NE;
- switch (RK) {
- default:
- llvm_unreachable("Unknown min/max recurrence kind");
- case RecurrenceDescriptor::MRK_UIntMin:
- P = CmpInst::ICMP_ULT;
- break;
- case RecurrenceDescriptor::MRK_UIntMax:
- P = CmpInst::ICMP_UGT;
- break;
- case RecurrenceDescriptor::MRK_SIntMin:
- P = CmpInst::ICMP_SLT;
- break;
- case RecurrenceDescriptor::MRK_SIntMax:
- P = CmpInst::ICMP_SGT;
- break;
- case RecurrenceDescriptor::MRK_FloatMin:
- P = CmpInst::FCMP_OLT;
- break;
- case RecurrenceDescriptor::MRK_FloatMax:
- P = CmpInst::FCMP_OGT;
- break;
- }
-
- // We only match FP sequences that are 'fast', so we can unconditionally
- // set it on any generated instructions.
- IRBuilder<>::FastMathFlagGuard FMFG(Builder);
- FastMathFlags FMF;
- FMF.setFast();
- Builder.setFastMathFlags(FMF);
-
- Value *Cmp;
- if (RK == RecurrenceDescriptor::MRK_FloatMin ||
- RK == RecurrenceDescriptor::MRK_FloatMax)
- Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
- else
- Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
-
- Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
- return Select;
-}
-
-// Helper to generate an ordered reduction.
-Value *
-llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src,
- unsigned Op,
- RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
- ArrayRef<Value *> RedOps) {
- unsigned VF = Src->getType()->getVectorNumElements();
-
- // Extract and apply reduction ops in ascending order:
- // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1]
- Value *Result = Acc;
- for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) {
- Value *Ext =
- Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx));
-
- if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
- Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext,
- "bin.rdx");
- } else {
- assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
- "Invalid min/max");
- Result = createMinMaxOp(Builder, MinMaxKind, Result, Ext);
- }
-
- if (!RedOps.empty())
- propagateIRFlags(Result, RedOps);
- }
-
- return Result;
-}
-
-// Helper to generate a log2 shuffle reduction.
-Value *
-llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
- RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
- ArrayRef<Value *> RedOps) {
- unsigned VF = Src->getType()->getVectorNumElements();
- // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
- // and vector ops, reducing the set of values being computed by half each
- // round.
- assert(isPowerOf2_32(VF) &&
- "Reduction emission only supported for pow2 vectors!");
- Value *TmpVec = Src;
- SmallVector<Constant *, 32> ShuffleMask(VF, nullptr);
- for (unsigned i = VF; i != 1; i >>= 1) {
- // Move the upper half of the vector to the lower half.
- for (unsigned j = 0; j != i / 2; ++j)
- ShuffleMask[j] = Builder.getInt32(i / 2 + j);
-
- // Fill the rest of the mask with undef.
- std::fill(&ShuffleMask[i / 2], ShuffleMask.end(),
- UndefValue::get(Builder.getInt32Ty()));
-
- Value *Shuf = Builder.CreateShuffleVector(
- TmpVec, UndefValue::get(TmpVec->getType()),
- ConstantVector::get(ShuffleMask), "rdx.shuf");
-
- if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
- // The builder propagates its fast-math-flags setting.
- TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
- "bin.rdx");
- } else {
- assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
- "Invalid min/max");
- TmpVec = createMinMaxOp(Builder, MinMaxKind, TmpVec, Shuf);
- }
- if (!RedOps.empty())
- propagateIRFlags(TmpVec, RedOps);
- }
- // The result is in the first element of the vector.
- return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
-}
-
-/// Create a simple vector reduction specified by an opcode and some
-/// flags (if generating min/max reductions).
-Value *llvm::createSimpleTargetReduction(
- IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode,
- Value *Src, TargetTransformInfo::ReductionFlags Flags,
- ArrayRef<Value *> RedOps) {
- assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
-
- std::function<Value *()> BuildFunc;
- using RD = RecurrenceDescriptor;
- RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
-
- switch (Opcode) {
- case Instruction::Add:
- BuildFunc = [&]() { return Builder.CreateAddReduce(Src); };
- break;
- case Instruction::Mul:
- BuildFunc = [&]() { return Builder.CreateMulReduce(Src); };
- break;
- case Instruction::And:
- BuildFunc = [&]() { return Builder.CreateAndReduce(Src); };
- break;
- case Instruction::Or:
- BuildFunc = [&]() { return Builder.CreateOrReduce(Src); };
- break;
- case Instruction::Xor:
- BuildFunc = [&]() { return Builder.CreateXorReduce(Src); };
- break;
- case Instruction::FAdd:
- BuildFunc = [&]() {
- auto Rdx = Builder.CreateFAddReduce(
- Constant::getNullValue(Src->getType()->getVectorElementType()), Src);
- return Rdx;
- };
- break;
- case Instruction::FMul:
- BuildFunc = [&]() {
- Type *Ty = Src->getType()->getVectorElementType();
- auto Rdx = Builder.CreateFMulReduce(ConstantFP::get(Ty, 1.0), Src);
- return Rdx;
- };
- break;
- case Instruction::ICmp:
- if (Flags.IsMaxOp) {
- MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax;
- BuildFunc = [&]() {
- return Builder.CreateIntMaxReduce(Src, Flags.IsSigned);
- };
- } else {
- MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin;
- BuildFunc = [&]() {
- return Builder.CreateIntMinReduce(Src, Flags.IsSigned);
- };
- }
- break;
- case Instruction::FCmp:
- if (Flags.IsMaxOp) {
- MinMaxKind = RD::MRK_FloatMax;
- BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); };
- } else {
- MinMaxKind = RD::MRK_FloatMin;
- BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); };
- }
- break;
- default:
- llvm_unreachable("Unhandled opcode");
- break;
- }
- if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags))
- return BuildFunc();
- return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps);
-}
-
-/// Create a vector reduction using a given recurrence descriptor.
-Value *llvm::createTargetReduction(IRBuilder<> &B,
- const TargetTransformInfo *TTI,
- RecurrenceDescriptor &Desc, Value *Src,
- bool NoNaN) {
- // TODO: Support in-order reductions based on the recurrence descriptor.
- using RD = RecurrenceDescriptor;
- RD::RecurrenceKind RecKind = Desc.getRecurrenceKind();
- TargetTransformInfo::ReductionFlags Flags;
- Flags.NoNaN = NoNaN;
-
- // All ops in the reduction inherit fast-math-flags from the recurrence
- // descriptor.
- IRBuilder<>::FastMathFlagGuard FMFGuard(B);
- B.setFastMathFlags(Desc.getFastMathFlags());
-
- switch (RecKind) {
- case RD::RK_FloatAdd:
- return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags);
- case RD::RK_FloatMult:
- return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags);
- case RD::RK_IntegerAdd:
- return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags);
- case RD::RK_IntegerMult:
- return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags);
- case RD::RK_IntegerAnd:
- return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags);
- case RD::RK_IntegerOr:
- return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags);
- case RD::RK_IntegerXor:
- return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags);
- case RD::RK_IntegerMinMax: {
- RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind();
- Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax);
- Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin);
- return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags);
- }
- case RD::RK_FloatMinMax: {
- Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax;
- return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags);
- }
- default:
- llvm_unreachable("Unhandled RecKind");
- }
-}
-
-void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
- auto *VecOp = dyn_cast<Instruction>(I);
- if (!VecOp)
- return;
- auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0])
- : dyn_cast<Instruction>(OpValue);
- if (!Intersection)
- return;
- const unsigned Opcode = Intersection->getOpcode();
- VecOp->copyIRFlags(Intersection);
- for (auto *V : VL) {
- auto *Instr = dyn_cast<Instruction>(V);
- if (!Instr)
- continue;
- if (OpValue == nullptr || Opcode == Instr->getOpcode())
- VecOp->andIRFlags(V);
- }
-}
-
-bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L,
- ScalarEvolution &SE) {
- const SCEV *Zero = SE.getZero(S->getType());
- return SE.isAvailableAtLoopEntry(S, L) &&
- SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, S, Zero);
-}
-
-bool llvm::isKnownNonNegativeInLoop(const SCEV *S, const Loop *L,
- ScalarEvolution &SE) {
- const SCEV *Zero = SE.getZero(S->getType());
- return SE.isAvailableAtLoopEntry(S, L) &&
- SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero);
-}
-
-bool llvm::cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
- bool Signed) {
- unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
- APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) :
- APInt::getMinValue(BitWidth);
- auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
- return SE.isAvailableAtLoopEntry(S, L) &&
- SE.isLoopEntryGuardedByCond(L, Predicate, S,
- SE.getConstant(Min));
-}
-
-bool llvm::cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
- bool Signed) {
- unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
- APInt Max = Signed ? APInt::getSignedMaxValue(BitWidth) :
- APInt::getMaxValue(BitWidth);
- auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
- return SE.isAvailableAtLoopEntry(S, L) &&
- SE.isLoopEntryGuardedByCond(L, Predicate, S,
- SE.getConstant(Max));
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp
deleted file mode 100644
index a9a480a4b7f9..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ /dev/null
@@ -1,326 +0,0 @@
-//===- LoopVersioning.cpp - Utility to version a loop ---------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a utility class to perform loop versioning. The versioned
-// loop speculates that otherwise may-aliasing memory accesses don't overlap and
-// emits checks to prove this.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/LoopVersioning.h"
-#include "llvm/Analysis/LoopAccessAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/MDBuilder.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-
-using namespace llvm;
-
-static cl::opt<bool>
- AnnotateNoAlias("loop-version-annotate-no-alias", cl::init(true),
- cl::Hidden,
- cl::desc("Add no-alias annotation for instructions that "
- "are disambiguated by memchecks"));
-
-LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
- DominatorTree *DT, ScalarEvolution *SE,
- bool UseLAIChecks)
- : VersionedLoop(L), NonVersionedLoop(nullptr), LAI(LAI), LI(LI), DT(DT),
- SE(SE) {
- assert(L->getExitBlock() && "No single exit block");
- assert(L->isLoopSimplifyForm() && "Loop is not in loop-simplify form");
- if (UseLAIChecks) {
- setAliasChecks(LAI.getRuntimePointerChecking()->getChecks());
- setSCEVChecks(LAI.getPSE().getUnionPredicate());
- }
-}
-
-void LoopVersioning::setAliasChecks(
- SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks) {
- AliasChecks = std::move(Checks);
-}
-
-void LoopVersioning::setSCEVChecks(SCEVUnionPredicate Check) {
- Preds = std::move(Check);
-}
-
-void LoopVersioning::versionLoop(
- const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
- Instruction *FirstCheckInst;
- Instruction *MemRuntimeCheck;
- Value *SCEVRuntimeCheck;
- Value *RuntimeCheck = nullptr;
-
- // Add the memcheck in the original preheader (this is empty initially).
- BasicBlock *RuntimeCheckBB = VersionedLoop->getLoopPreheader();
- std::tie(FirstCheckInst, MemRuntimeCheck) =
- LAI.addRuntimeChecks(RuntimeCheckBB->getTerminator(), AliasChecks);
-
- const SCEVUnionPredicate &Pred = LAI.getPSE().getUnionPredicate();
- SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(),
- "scev.check");
- SCEVRuntimeCheck =
- Exp.expandCodeForPredicate(&Pred, RuntimeCheckBB->getTerminator());
- auto *CI = dyn_cast<ConstantInt>(SCEVRuntimeCheck);
-
- // Discard the SCEV runtime check if it is always true.
- if (CI && CI->isZero())
- SCEVRuntimeCheck = nullptr;
-
- if (MemRuntimeCheck && SCEVRuntimeCheck) {
- RuntimeCheck = BinaryOperator::Create(Instruction::Or, MemRuntimeCheck,
- SCEVRuntimeCheck, "lver.safe");
- if (auto *I = dyn_cast<Instruction>(RuntimeCheck))
- I->insertBefore(RuntimeCheckBB->getTerminator());
- } else
- RuntimeCheck = MemRuntimeCheck ? MemRuntimeCheck : SCEVRuntimeCheck;
-
- assert(RuntimeCheck && "called even though we don't need "
- "any runtime checks");
-
- // Rename the block to make the IR more readable.
- RuntimeCheckBB->setName(VersionedLoop->getHeader()->getName() +
- ".lver.check");
-
- // Create empty preheader for the loop (and after cloning for the
- // non-versioned loop).
- BasicBlock *PH =
- SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI);
- PH->setName(VersionedLoop->getHeader()->getName() + ".ph");
-
- // Clone the loop including the preheader.
- //
- // FIXME: This does not currently preserve SimplifyLoop because the exit
- // block is a join between the two loops.
- SmallVector<BasicBlock *, 8> NonVersionedLoopBlocks;
- NonVersionedLoop =
- cloneLoopWithPreheader(PH, RuntimeCheckBB, VersionedLoop, VMap,
- ".lver.orig", LI, DT, NonVersionedLoopBlocks);
- remapInstructionsInBlocks(NonVersionedLoopBlocks, VMap);
-
- // Insert the conditional branch based on the result of the memchecks.
- Instruction *OrigTerm = RuntimeCheckBB->getTerminator();
- BranchInst::Create(NonVersionedLoop->getLoopPreheader(),
- VersionedLoop->getLoopPreheader(), RuntimeCheck, OrigTerm);
- OrigTerm->eraseFromParent();
-
- // The loops merge in the original exit block. This is now dominated by the
- // memchecking block.
- DT->changeImmediateDominator(VersionedLoop->getExitBlock(), RuntimeCheckBB);
-
- // Adds the necessary PHI nodes for the versioned loops based on the
- // loop-defined values used outside of the loop.
- addPHINodes(DefsUsedOutside);
-}
-
-void LoopVersioning::addPHINodes(
- const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
- BasicBlock *PHIBlock = VersionedLoop->getExitBlock();
- assert(PHIBlock && "No single successor to loop exit block");
- PHINode *PN;
-
- // First add a single-operand PHI for each DefsUsedOutside if one does not
- // exists yet.
- for (auto *Inst : DefsUsedOutside) {
- // See if we have a single-operand PHI with the value defined by the
- // original loop.
- for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) {
- if (PN->getIncomingValue(0) == Inst)
- break;
- }
- // If not create it.
- if (!PN) {
- PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver",
- &PHIBlock->front());
- SmallVector<User*, 8> UsersToUpdate;
- for (User *U : Inst->users())
- if (!VersionedLoop->contains(cast<Instruction>(U)->getParent()))
- UsersToUpdate.push_back(U);
- for (User *U : UsersToUpdate)
- U->replaceUsesOfWith(Inst, PN);
- PN->addIncoming(Inst, VersionedLoop->getExitingBlock());
- }
- }
-
- // Then for each PHI add the operand for the edge from the cloned loop.
- for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) {
- assert(PN->getNumOperands() == 1 &&
- "Exit block should only have on predecessor");
-
- // If the definition was cloned used that otherwise use the same value.
- Value *ClonedValue = PN->getIncomingValue(0);
- auto Mapped = VMap.find(ClonedValue);
- if (Mapped != VMap.end())
- ClonedValue = Mapped->second;
-
- PN->addIncoming(ClonedValue, NonVersionedLoop->getExitingBlock());
- }
-}
-
-void LoopVersioning::prepareNoAliasMetadata() {
- // We need to turn the no-alias relation between pointer checking groups into
- // no-aliasing annotations between instructions.
- //
- // We accomplish this by mapping each pointer checking group (a set of
- // pointers memchecked together) to an alias scope and then also mapping each
- // group to the list of scopes it can't alias.
-
- const RuntimePointerChecking *RtPtrChecking = LAI.getRuntimePointerChecking();
- LLVMContext &Context = VersionedLoop->getHeader()->getContext();
-
- // First allocate an aliasing scope for each pointer checking group.
- //
- // While traversing through the checking groups in the loop, also create a
- // reverse map from pointers to the pointer checking group they were assigned
- // to.
- MDBuilder MDB(Context);
- MDNode *Domain = MDB.createAnonymousAliasScopeDomain("LVerDomain");
-
- for (const auto &Group : RtPtrChecking->CheckingGroups) {
- GroupToScope[&Group] = MDB.createAnonymousAliasScope(Domain);
-
- for (unsigned PtrIdx : Group.Members)
- PtrToGroup[RtPtrChecking->getPointerInfo(PtrIdx).PointerValue] = &Group;
- }
-
- // Go through the checks and for each pointer group, collect the scopes for
- // each non-aliasing pointer group.
- DenseMap<const RuntimePointerChecking::CheckingPtrGroup *,
- SmallVector<Metadata *, 4>>
- GroupToNonAliasingScopes;
-
- for (const auto &Check : AliasChecks)
- GroupToNonAliasingScopes[Check.first].push_back(GroupToScope[Check.second]);
-
- // Finally, transform the above to actually map to scope list which is what
- // the metadata uses.
-
- for (auto Pair : GroupToNonAliasingScopes)
- GroupToNonAliasingScopeList[Pair.first] = MDNode::get(Context, Pair.second);
-}
-
-void LoopVersioning::annotateLoopWithNoAlias() {
- if (!AnnotateNoAlias)
- return;
-
- // First prepare the maps.
- prepareNoAliasMetadata();
-
- // Add the scope and no-alias metadata to the instructions.
- for (Instruction *I : LAI.getDepChecker().getMemoryInstructions()) {
- annotateInstWithNoAlias(I);
- }
-}
-
-void LoopVersioning::annotateInstWithNoAlias(Instruction *VersionedInst,
- const Instruction *OrigInst) {
- if (!AnnotateNoAlias)
- return;
-
- LLVMContext &Context = VersionedLoop->getHeader()->getContext();
- const Value *Ptr = isa<LoadInst>(OrigInst)
- ? cast<LoadInst>(OrigInst)->getPointerOperand()
- : cast<StoreInst>(OrigInst)->getPointerOperand();
-
- // Find the group for the pointer and then add the scope metadata.
- auto Group = PtrToGroup.find(Ptr);
- if (Group != PtrToGroup.end()) {
- VersionedInst->setMetadata(
- LLVMContext::MD_alias_scope,
- MDNode::concatenate(
- VersionedInst->getMetadata(LLVMContext::MD_alias_scope),
- MDNode::get(Context, GroupToScope[Group->second])));
-
- // Add the no-alias metadata.
- auto NonAliasingScopeList = GroupToNonAliasingScopeList.find(Group->second);
- if (NonAliasingScopeList != GroupToNonAliasingScopeList.end())
- VersionedInst->setMetadata(
- LLVMContext::MD_noalias,
- MDNode::concatenate(
- VersionedInst->getMetadata(LLVMContext::MD_noalias),
- NonAliasingScopeList->second));
- }
-}
-
-namespace {
-/// Also expose this is a pass. Currently this is only used for
-/// unit-testing. It adds all memchecks necessary to remove all may-aliasing
-/// array accesses from the loop.
-class LoopVersioningPass : public FunctionPass {
-public:
- LoopVersioningPass() : FunctionPass(ID) {
- initializeLoopVersioningPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
-
- // Build up a worklist of inner-loops to version. This is necessary as the
- // act of versioning a loop creates new loops and can invalidate iterators
- // across the loops.
- SmallVector<Loop *, 8> Worklist;
-
- for (Loop *TopLevelLoop : *LI)
- for (Loop *L : depth_first(TopLevelLoop))
- // We only handle inner-most loops.
- if (L->empty())
- Worklist.push_back(L);
-
- // Now walk the identified inner loops.
- bool Changed = false;
- for (Loop *L : Worklist) {
- const LoopAccessInfo &LAI = LAA->getInfo(L);
- if (L->isLoopSimplifyForm() && !LAI.hasConvergentOp() &&
- (LAI.getNumRuntimePointerChecks() ||
- !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) {
- LoopVersioning LVer(LAI, L, LI, DT, SE);
- LVer.versionLoop();
- LVer.annotateLoopWithNoAlias();
- Changed = true;
- }
- }
-
- return Changed;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequired<LoopAccessLegacyAnalysis>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
- }
-
- static char ID;
-};
-}
-
-#define LVER_OPTION "loop-versioning"
-#define DEBUG_TYPE LVER_OPTION
-
-char LoopVersioningPass::ID;
-static const char LVer_name[] = "Loop Versioning";
-
-INITIALIZE_PASS_BEGIN(LoopVersioningPass, LVER_OPTION, LVer_name, false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_END(LoopVersioningPass, LVER_OPTION, LVer_name, false, false)
-
-namespace llvm {
-FunctionPass *createLoopVersioningPass() {
- return new LoopVersioningPass();
-}
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
deleted file mode 100644
index fe67e191dc62..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-//===- LowerInvoke.cpp - Eliminate Invoke instructions --------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This transformation is designed for use by code generators which do not yet
-// support stack unwinding. This pass converts 'invoke' instructions to 'call'
-// instructions, so that any exception-handling 'landingpad' blocks become dead
-// code (which can be removed by running the '-simplifycfg' pass afterwards).
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/LowerInvoke.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "lowerinvoke"
-
-STATISTIC(NumInvokes, "Number of invokes replaced");
-
-namespace {
- class LowerInvokeLegacyPass : public FunctionPass {
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit LowerInvokeLegacyPass() : FunctionPass(ID) {
- initializeLowerInvokeLegacyPassPass(*PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F) override;
- };
-}
-
-char LowerInvokeLegacyPass::ID = 0;
-INITIALIZE_PASS(LowerInvokeLegacyPass, "lowerinvoke",
- "Lower invoke and unwind, for unwindless code generators",
- false, false)
-
-static bool runImpl(Function &F) {
- bool Changed = false;
- for (BasicBlock &BB : F)
- if (InvokeInst *II = dyn_cast<InvokeInst>(BB.getTerminator())) {
- SmallVector<Value *, 16> CallArgs(II->arg_begin(), II->arg_end());
- SmallVector<OperandBundleDef, 1> OpBundles;
- II->getOperandBundlesAsDefs(OpBundles);
- // Insert a normal call instruction...
- CallInst *NewCall =
- CallInst::Create(II->getFunctionType(), II->getCalledValue(),
- CallArgs, OpBundles, "", II);
- NewCall->takeName(II);
- NewCall->setCallingConv(II->getCallingConv());
- NewCall->setAttributes(II->getAttributes());
- NewCall->setDebugLoc(II->getDebugLoc());
- II->replaceAllUsesWith(NewCall);
-
- // Insert an unconditional branch to the normal destination.
- BranchInst::Create(II->getNormalDest(), II);
-
- // Remove any PHI node entries from the exception destination.
- II->getUnwindDest()->removePredecessor(&BB);
-
- // Remove the invoke instruction now.
- BB.getInstList().erase(II);
-
- ++NumInvokes;
- Changed = true;
- }
- return Changed;
-}
-
-bool LowerInvokeLegacyPass::runOnFunction(Function &F) {
- return runImpl(F);
-}
-
-namespace llvm {
-char &LowerInvokePassID = LowerInvokeLegacyPass::ID;
-
-// Public Interface To the LowerInvoke pass.
-FunctionPass *createLowerInvokePass() { return new LowerInvokeLegacyPass(); }
-
-PreservedAnalyses LowerInvokePass::run(Function &F,
- FunctionAnalysisManager &AM) {
- bool Changed = runImpl(F);
- if (!Changed)
- return PreservedAnalyses::all();
-
- return PreservedAnalyses::none();
-}
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
deleted file mode 100644
index 0cc085dc366c..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ /dev/null
@@ -1,451 +0,0 @@
-//===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-
-using namespace llvm;
-
-static unsigned getLoopOperandSizeInBytes(Type *Type) {
- if (VectorType *VTy = dyn_cast<VectorType>(Type)) {
- return VTy->getBitWidth() / 8;
- }
-
- return Type->getPrimitiveSizeInBits() / 8;
-}
-
-void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
- Value *DstAddr, ConstantInt *CopyLen,
- unsigned SrcAlign, unsigned DestAlign,
- bool SrcIsVolatile, bool DstIsVolatile,
- const TargetTransformInfo &TTI) {
- // No need to expand zero length copies.
- if (CopyLen->isZero())
- return;
-
- BasicBlock *PreLoopBB = InsertBefore->getParent();
- BasicBlock *PostLoopBB = nullptr;
- Function *ParentFunc = PreLoopBB->getParent();
- LLVMContext &Ctx = PreLoopBB->getContext();
-
- Type *TypeOfCopyLen = CopyLen->getType();
- Type *LoopOpType =
- TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign);
-
- unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType);
- uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize;
-
- unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
- unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
-
- if (LoopEndCount != 0) {
- // Split
- PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split");
- BasicBlock *LoopBB =
- BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB);
- PreLoopBB->getTerminator()->setSuccessor(0, LoopBB);
-
- IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
-
- // Cast the Src and Dst pointers to pointers to the loop operand type (if
- // needed).
- PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
- PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
- if (SrcAddr->getType() != SrcOpType) {
- SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
- }
- if (DstAddr->getType() != DstOpType) {
- DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
- }
-
- IRBuilder<> LoopBuilder(LoopBB);
- PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index");
- LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB);
- // Loop Body
- Value *SrcGEP =
- LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
- Value *Load = LoopBuilder.CreateLoad(LoopOpType, SrcGEP, SrcIsVolatile);
- Value *DstGEP =
- LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
- LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
-
- Value *NewIndex =
- LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U));
- LoopIndex->addIncoming(NewIndex, LoopBB);
-
- // Create the loop branch condition.
- Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount);
- LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI),
- LoopBB, PostLoopBB);
- }
-
- uint64_t BytesCopied = LoopEndCount * LoopOpSize;
- uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied;
- if (RemainingBytes) {
- IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI()
- : InsertBefore);
-
- // Update the alignment based on the copy size used in the loop body.
- SrcAlign = std::min(SrcAlign, LoopOpSize);
- DestAlign = std::min(DestAlign, LoopOpSize);
-
- SmallVector<Type *, 5> RemainingOps;
- TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
- SrcAlign, DestAlign);
-
- for (auto OpTy : RemainingOps) {
- // Calaculate the new index
- unsigned OperandSize = getLoopOperandSizeInBytes(OpTy);
- uint64_t GepIndex = BytesCopied / OperandSize;
- assert(GepIndex * OperandSize == BytesCopied &&
- "Division should have no Remainder!");
- // Cast source to operand type and load
- PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS);
- Value *CastedSrc = SrcAddr->getType() == SrcPtrType
- ? SrcAddr
- : RBuilder.CreateBitCast(SrcAddr, SrcPtrType);
- Value *SrcGEP = RBuilder.CreateInBoundsGEP(
- OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex));
- Value *Load = RBuilder.CreateLoad(OpTy, SrcGEP, SrcIsVolatile);
-
- // Cast destination to operand type and store.
- PointerType *DstPtrType = PointerType::get(OpTy, DstAS);
- Value *CastedDst = DstAddr->getType() == DstPtrType
- ? DstAddr
- : RBuilder.CreateBitCast(DstAddr, DstPtrType);
- Value *DstGEP = RBuilder.CreateInBoundsGEP(
- OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex));
- RBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
-
- BytesCopied += OperandSize;
- }
- }
- assert(BytesCopied == CopyLen->getZExtValue() &&
- "Bytes copied should match size in the call!");
-}
-
-void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
- Value *SrcAddr, Value *DstAddr,
- Value *CopyLen, unsigned SrcAlign,
- unsigned DestAlign, bool SrcIsVolatile,
- bool DstIsVolatile,
- const TargetTransformInfo &TTI) {
- BasicBlock *PreLoopBB = InsertBefore->getParent();
- BasicBlock *PostLoopBB =
- PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion");
-
- Function *ParentFunc = PreLoopBB->getParent();
- LLVMContext &Ctx = PreLoopBB->getContext();
-
- Type *LoopOpType =
- TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign);
- unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType);
-
- IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
-
- unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
- unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
- PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
- PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
- if (SrcAddr->getType() != SrcOpType) {
- SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
- }
- if (DstAddr->getType() != DstOpType) {
- DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
- }
-
- // Calculate the loop trip count, and remaining bytes to copy after the loop.
- Type *CopyLenType = CopyLen->getType();
- IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType);
- assert(ILengthType &&
- "expected size argument to memcpy to be an integer type!");
- Type *Int8Type = Type::getInt8Ty(Ctx);
- bool LoopOpIsInt8 = LoopOpType == Int8Type;
- ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
- Value *RuntimeLoopCount = LoopOpIsInt8 ?
- CopyLen :
- PLBuilder.CreateUDiv(CopyLen, CILoopOpSize);
- BasicBlock *LoopBB =
- BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB);
- IRBuilder<> LoopBuilder(LoopBB);
-
- PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index");
- LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB);
-
- Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
- Value *Load = LoopBuilder.CreateLoad(LoopOpType, SrcGEP, SrcIsVolatile);
- Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
- LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
-
- Value *NewIndex =
- LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U));
- LoopIndex->addIncoming(NewIndex, LoopBB);
-
- if (!LoopOpIsInt8) {
- // Add in the
- Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize);
- Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
-
- // Loop body for the residual copy.
- BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual",
- PreLoopBB->getParent(),
- PostLoopBB);
- // Residual loop header.
- BasicBlock *ResHeaderBB = BasicBlock::Create(
- Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr);
-
- // Need to update the pre-loop basic block to branch to the correct place.
- // branch to the main loop if the count is non-zero, branch to the residual
- // loop if the copy size is smaller then 1 iteration of the main loop but
- // non-zero and finally branch to after the residual loop if the memcpy
- // size is zero.
- ConstantInt *Zero = ConstantInt::get(ILengthType, 0U);
- PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
- LoopBB, ResHeaderBB);
- PreLoopBB->getTerminator()->eraseFromParent();
-
- LoopBuilder.CreateCondBr(
- LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
- ResHeaderBB);
-
- // Determine if we need to branch to the residual loop or bypass it.
- IRBuilder<> RHBuilder(ResHeaderBB);
- RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero),
- ResLoopBB, PostLoopBB);
-
- // Copy the residual with single byte load/store loop.
- IRBuilder<> ResBuilder(ResLoopBB);
- PHINode *ResidualIndex =
- ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index");
- ResidualIndex->addIncoming(Zero, ResHeaderBB);
-
- Value *SrcAsInt8 =
- ResBuilder.CreateBitCast(SrcAddr, PointerType::get(Int8Type, SrcAS));
- Value *DstAsInt8 =
- ResBuilder.CreateBitCast(DstAddr, PointerType::get(Int8Type, DstAS));
- Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex);
- Value *SrcGEP =
- ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset);
- Value *Load = ResBuilder.CreateLoad(Int8Type, SrcGEP, SrcIsVolatile);
- Value *DstGEP =
- ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset);
- ResBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
-
- Value *ResNewIndex =
- ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U));
- ResidualIndex->addIncoming(ResNewIndex, ResLoopBB);
-
- // Create the loop branch condition.
- ResBuilder.CreateCondBr(
- ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB,
- PostLoopBB);
- } else {
- // In this case the loop operand type was a byte, and there is no need for a
- // residual loop to copy the remaining memory after the main loop.
- // We do however need to patch up the control flow by creating the
- // terminators for the preloop block and the memcpy loop.
- ConstantInt *Zero = ConstantInt::get(ILengthType, 0U);
- PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
- LoopBB, PostLoopBB);
- PreLoopBB->getTerminator()->eraseFromParent();
- LoopBuilder.CreateCondBr(
- LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
- PostLoopBB);
- }
-}
-
-// Lower memmove to IR. memmove is required to correctly copy overlapping memory
-// regions; therefore, it has to check the relative positions of the source and
-// destination pointers and choose the copy direction accordingly.
-//
-// The code below is an IR rendition of this C function:
-//
-// void* memmove(void* dst, const void* src, size_t n) {
-// unsigned char* d = dst;
-// const unsigned char* s = src;
-// if (s < d) {
-// // copy backwards
-// while (n--) {
-// d[n] = s[n];
-// }
-// } else {
-// // copy forward
-// for (size_t i = 0; i < n; ++i) {
-// d[i] = s[i];
-// }
-// }
-// return dst;
-// }
-static void createMemMoveLoop(Instruction *InsertBefore,
- Value *SrcAddr, Value *DstAddr, Value *CopyLen,
- unsigned SrcAlign, unsigned DestAlign,
- bool SrcIsVolatile, bool DstIsVolatile) {
- Type *TypeOfCopyLen = CopyLen->getType();
- BasicBlock *OrigBB = InsertBefore->getParent();
- Function *F = OrigBB->getParent();
-
- Type *EltTy = cast<PointerType>(SrcAddr->getType())->getElementType();
-
- // Create the a comparison of src and dst, based on which we jump to either
- // the forward-copy part of the function (if src >= dst) or the backwards-copy
- // part (if src < dst).
- // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
- // structure. Its block terminators (unconditional branches) are replaced by
- // the appropriate conditional branches when the loop is built.
- ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT,
- SrcAddr, DstAddr, "compare_src_dst");
- Instruction *ThenTerm, *ElseTerm;
- SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm,
- &ElseTerm);
-
- // Each part of the function consists of two blocks:
- // copy_backwards: used to skip the loop when n == 0
- // copy_backwards_loop: the actual backwards loop BB
- // copy_forward: used to skip the loop when n == 0
- // copy_forward_loop: the actual forward loop BB
- BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
- CopyBackwardsBB->setName("copy_backwards");
- BasicBlock *CopyForwardBB = ElseTerm->getParent();
- CopyForwardBB->setName("copy_forward");
- BasicBlock *ExitBB = InsertBefore->getParent();
- ExitBB->setName("memmove_done");
-
- // Initial comparison of n == 0 that lets us skip the loops altogether. Shared
- // between both backwards and forward copy clauses.
- ICmpInst *CompareN =
- new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen,
- ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0");
-
- // Copying backwards.
- BasicBlock *LoopBB =
- BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB);
- IRBuilder<> LoopBuilder(LoopBB);
- PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
- Value *IndexPtr = LoopBuilder.CreateSub(
- LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr");
- Value *Element = LoopBuilder.CreateLoad(
- EltTy, LoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, IndexPtr),
- "element");
- LoopBuilder.CreateStore(
- Element, LoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, IndexPtr));
- LoopBuilder.CreateCondBr(
- LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)),
- ExitBB, LoopBB);
- LoopPhi->addIncoming(IndexPtr, LoopBB);
- LoopPhi->addIncoming(CopyLen, CopyBackwardsBB);
- BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm);
- ThenTerm->eraseFromParent();
-
- // Copying forward.
- BasicBlock *FwdLoopBB =
- BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB);
- IRBuilder<> FwdLoopBuilder(FwdLoopBB);
- PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr");
- Value *FwdElement = FwdLoopBuilder.CreateLoad(
- EltTy, FwdLoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, FwdCopyPhi),
- "element");
- FwdLoopBuilder.CreateStore(
- FwdElement, FwdLoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, FwdCopyPhi));
- Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd(
- FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment");
- FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen),
- ExitBB, FwdLoopBB);
- FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB);
- FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB);
-
- BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm);
- ElseTerm->eraseFromParent();
-}
-
-static void createMemSetLoop(Instruction *InsertBefore,
- Value *DstAddr, Value *CopyLen, Value *SetValue,
- unsigned Align, bool IsVolatile) {
- Type *TypeOfCopyLen = CopyLen->getType();
- BasicBlock *OrigBB = InsertBefore->getParent();
- Function *F = OrigBB->getParent();
- BasicBlock *NewBB =
- OrigBB->splitBasicBlock(InsertBefore, "split");
- BasicBlock *LoopBB
- = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB);
-
- IRBuilder<> Builder(OrigBB->getTerminator());
-
- // Cast pointer to the type of value getting stored
- unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
- DstAddr = Builder.CreateBitCast(DstAddr,
- PointerType::get(SetValue->getType(), dstAS));
-
- Builder.CreateCondBr(
- Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
- LoopBB);
- OrigBB->getTerminator()->eraseFromParent();
-
- IRBuilder<> LoopBuilder(LoopBB);
- PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
- LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
-
- LoopBuilder.CreateStore(
- SetValue,
- LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex),
- IsVolatile);
-
- Value *NewIndex =
- LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
- LoopIndex->addIncoming(NewIndex, LoopBB);
-
- LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
- NewBB);
-}
-
-void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
- const TargetTransformInfo &TTI) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
- createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy,
- /* SrcAddr */ Memcpy->getRawSource(),
- /* DstAddr */ Memcpy->getRawDest(),
- /* CopyLen */ CI,
- /* SrcAlign */ Memcpy->getSourceAlignment(),
- /* DestAlign */ Memcpy->getDestAlignment(),
- /* SrcIsVolatile */ Memcpy->isVolatile(),
- /* DstIsVolatile */ Memcpy->isVolatile(),
- /* TargetTransformInfo */ TTI);
- } else {
- createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy,
- /* SrcAddr */ Memcpy->getRawSource(),
- /* DstAddr */ Memcpy->getRawDest(),
- /* CopyLen */ Memcpy->getLength(),
- /* SrcAlign */ Memcpy->getSourceAlignment(),
- /* DestAlign */ Memcpy->getDestAlignment(),
- /* SrcIsVolatile */ Memcpy->isVolatile(),
- /* DstIsVolatile */ Memcpy->isVolatile(),
- /* TargetTransfomrInfo */ TTI);
- }
-}
-
-void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) {
- createMemMoveLoop(/* InsertBefore */ Memmove,
- /* SrcAddr */ Memmove->getRawSource(),
- /* DstAddr */ Memmove->getRawDest(),
- /* CopyLen */ Memmove->getLength(),
- /* SrcAlign */ Memmove->getSourceAlignment(),
- /* DestAlign */ Memmove->getDestAlignment(),
- /* SrcIsVolatile */ Memmove->isVolatile(),
- /* DstIsVolatile */ Memmove->isVolatile());
-}
-
-void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
- createMemSetLoop(/* InsertBefore */ Memset,
- /* DstAddr */ Memset->getRawDest(),
- /* CopyLen */ Memset->getLength(),
- /* SetValue */ Memset->getValue(),
- /* Alignment */ Memset->getDestAlignment(),
- Memset->isVolatile());
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
deleted file mode 100644
index 8256e3b5f5af..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ /dev/null
@@ -1,618 +0,0 @@
-//===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// The LowerSwitch transformation rewrites switch instructions with a sequence
-// of branches, which allows targets to get away with not implementing the
-// switch instruction until it is convenient.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/LazyValueInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/ConstantRange.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <iterator>
-#include <limits>
-#include <vector>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "lower-switch"
-
-namespace {
-
- struct IntRange {
- int64_t Low, High;
- };
-
-} // end anonymous namespace
-
-// Return true iff R is covered by Ranges.
-static bool IsInRanges(const IntRange &R,
- const std::vector<IntRange> &Ranges) {
- // Note: Ranges must be sorted, non-overlapping and non-adjacent.
-
- // Find the first range whose High field is >= R.High,
- // then check if the Low field is <= R.Low. If so, we
- // have a Range that covers R.
- auto I = llvm::lower_bound(
- Ranges, R, [](IntRange A, IntRange B) { return A.High < B.High; });
- return I != Ranges.end() && I->Low <= R.Low;
-}
-
-namespace {
-
- /// Replace all SwitchInst instructions with chained branch instructions.
- class LowerSwitch : public FunctionPass {
- public:
- // Pass identification, replacement for typeid
- static char ID;
-
- LowerSwitch() : FunctionPass(ID) {
- initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LazyValueInfoWrapperPass>();
- }
-
- struct CaseRange {
- ConstantInt* Low;
- ConstantInt* High;
- BasicBlock* BB;
-
- CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb)
- : Low(low), High(high), BB(bb) {}
- };
-
- using CaseVector = std::vector<CaseRange>;
- using CaseItr = std::vector<CaseRange>::iterator;
-
- private:
- void processSwitchInst(SwitchInst *SI,
- SmallPtrSetImpl<BasicBlock *> &DeleteList,
- AssumptionCache *AC, LazyValueInfo *LVI);
-
- BasicBlock *switchConvert(CaseItr Begin, CaseItr End,
- ConstantInt *LowerBound, ConstantInt *UpperBound,
- Value *Val, BasicBlock *Predecessor,
- BasicBlock *OrigBlock, BasicBlock *Default,
- const std::vector<IntRange> &UnreachableRanges);
- BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val,
- ConstantInt *LowerBound, ConstantInt *UpperBound,
- BasicBlock *OrigBlock, BasicBlock *Default);
- unsigned Clusterify(CaseVector &Cases, SwitchInst *SI);
- };
-
- /// The comparison function for sorting the switch case values in the vector.
- /// WARNING: Case ranges should be disjoint!
- struct CaseCmp {
- bool operator()(const LowerSwitch::CaseRange& C1,
- const LowerSwitch::CaseRange& C2) {
- const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
- const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
- return CI1->getValue().slt(CI2->getValue());
- }
- };
-
-} // end anonymous namespace
-
-char LowerSwitch::ID = 0;
-
-// Publicly exposed interface to pass...
-char &llvm::LowerSwitchID = LowerSwitch::ID;
-
-INITIALIZE_PASS_BEGIN(LowerSwitch, "lowerswitch",
- "Lower SwitchInst's to branches", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
-INITIALIZE_PASS_END(LowerSwitch, "lowerswitch",
- "Lower SwitchInst's to branches", false, false)
-
-// createLowerSwitchPass - Interface to this file...
-FunctionPass *llvm::createLowerSwitchPass() {
- return new LowerSwitch();
-}
-
-bool LowerSwitch::runOnFunction(Function &F) {
- LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
- auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>();
- AssumptionCache *AC = ACT ? &ACT->getAssumptionCache(F) : nullptr;
- // Prevent LazyValueInfo from using the DominatorTree as LowerSwitch does not
- // preserve it and it becomes stale (when available) pretty much immediately.
- // Currently the DominatorTree is only used by LowerSwitch indirectly via LVI
- // and computeKnownBits to refine isValidAssumeForContext's results. Given
- // that the latter can handle some of the simple cases w/o a DominatorTree,
- // it's easier to refrain from using the tree than to keep it up to date.
- LVI->disableDT();
-
- bool Changed = false;
- SmallPtrSet<BasicBlock*, 8> DeleteList;
-
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
- BasicBlock *Cur = &*I++; // Advance over block so we don't traverse new blocks
-
- // If the block is a dead Default block that will be deleted later, don't
- // waste time processing it.
- if (DeleteList.count(Cur))
- continue;
-
- if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
- Changed = true;
- processSwitchInst(SI, DeleteList, AC, LVI);
- }
- }
-
- for (BasicBlock* BB: DeleteList) {
- LVI->eraseBlock(BB);
- DeleteDeadBlock(BB);
- }
-
- return Changed;
-}
-
-/// Used for debugging purposes.
-LLVM_ATTRIBUTE_USED
-static raw_ostream &operator<<(raw_ostream &O,
- const LowerSwitch::CaseVector &C) {
- O << "[";
-
- for (LowerSwitch::CaseVector::const_iterator B = C.begin(), E = C.end();
- B != E;) {
- O << "[" << B->Low->getValue() << ", " << B->High->getValue() << "]";
- if (++B != E)
- O << ", ";
- }
-
- return O << "]";
-}
-
-/// Update the first occurrence of the "switch statement" BB in the PHI
-/// node with the "new" BB. The other occurrences will:
-///
-/// 1) Be updated by subsequent calls to this function. Switch statements may
-/// have more than one outcoming edge into the same BB if they all have the same
-/// value. When the switch statement is converted these incoming edges are now
-/// coming from multiple BBs.
-/// 2) Removed if subsequent incoming values now share the same case, i.e.,
-/// multiple outcome edges are condensed into one. This is necessary to keep the
-/// number of phi values equal to the number of branches to SuccBB.
-static void
-fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
- const unsigned NumMergedCases = std::numeric_limits<unsigned>::max()) {
- for (BasicBlock::iterator I = SuccBB->begin(),
- IE = SuccBB->getFirstNonPHI()->getIterator();
- I != IE; ++I) {
- PHINode *PN = cast<PHINode>(I);
-
- // Only update the first occurrence.
- unsigned Idx = 0, E = PN->getNumIncomingValues();
- unsigned LocalNumMergedCases = NumMergedCases;
- for (; Idx != E; ++Idx) {
- if (PN->getIncomingBlock(Idx) == OrigBB) {
- PN->setIncomingBlock(Idx, NewBB);
- break;
- }
- }
-
- // Remove additional occurrences coming from condensed cases and keep the
- // number of incoming values equal to the number of branches to SuccBB.
- SmallVector<unsigned, 8> Indices;
- for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx)
- if (PN->getIncomingBlock(Idx) == OrigBB) {
- Indices.push_back(Idx);
- LocalNumMergedCases--;
- }
- // Remove incoming values in the reverse order to prevent invalidating
- // *successive* index.
- for (unsigned III : llvm::reverse(Indices))
- PN->removeIncomingValue(III);
- }
-}
-
-/// Convert the switch statement into a binary lookup of the case values.
-/// The function recursively builds this tree. LowerBound and UpperBound are
-/// used to keep track of the bounds for Val that have already been checked by
-/// a block emitted by one of the previous calls to switchConvert in the call
-/// stack.
-BasicBlock *
-LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
- ConstantInt *UpperBound, Value *Val,
- BasicBlock *Predecessor, BasicBlock *OrigBlock,
- BasicBlock *Default,
- const std::vector<IntRange> &UnreachableRanges) {
- assert(LowerBound && UpperBound && "Bounds must be initialized");
- unsigned Size = End - Begin;
-
- if (Size == 1) {
- // Check if the Case Range is perfectly squeezed in between
- // already checked Upper and Lower bounds. If it is then we can avoid
- // emitting the code that checks if the value actually falls in the range
- // because the bounds already tell us so.
- if (Begin->Low == LowerBound && Begin->High == UpperBound) {
- unsigned NumMergedCases = 0;
- NumMergedCases = UpperBound->getSExtValue() - LowerBound->getSExtValue();
- fixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases);
- return Begin->BB;
- }
- return newLeafBlock(*Begin, Val, LowerBound, UpperBound, OrigBlock,
- Default);
- }
-
- unsigned Mid = Size / 2;
- std::vector<CaseRange> LHS(Begin, Begin + Mid);
- LLVM_DEBUG(dbgs() << "LHS: " << LHS << "\n");
- std::vector<CaseRange> RHS(Begin + Mid, End);
- LLVM_DEBUG(dbgs() << "RHS: " << RHS << "\n");
-
- CaseRange &Pivot = *(Begin + Mid);
- LLVM_DEBUG(dbgs() << "Pivot ==> [" << Pivot.Low->getValue() << ", "
- << Pivot.High->getValue() << "]\n");
-
- // NewLowerBound here should never be the integer minimal value.
- // This is because it is computed from a case range that is never
- // the smallest, so there is always a case range that has at least
- // a smaller value.
- ConstantInt *NewLowerBound = Pivot.Low;
-
- // Because NewLowerBound is never the smallest representable integer
- // it is safe here to subtract one.
- ConstantInt *NewUpperBound = ConstantInt::get(NewLowerBound->getContext(),
- NewLowerBound->getValue() - 1);
-
- if (!UnreachableRanges.empty()) {
- // Check if the gap between LHS's highest and NewLowerBound is unreachable.
- int64_t GapLow = LHS.back().High->getSExtValue() + 1;
- int64_t GapHigh = NewLowerBound->getSExtValue() - 1;
- IntRange Gap = { GapLow, GapHigh };
- if (GapHigh >= GapLow && IsInRanges(Gap, UnreachableRanges))
- NewUpperBound = LHS.back().High;
- }
-
- LLVM_DEBUG(dbgs() << "LHS Bounds ==> [" << LowerBound->getSExtValue() << ", "
- << NewUpperBound->getSExtValue() << "]\n"
- << "RHS Bounds ==> [" << NewLowerBound->getSExtValue()
- << ", " << UpperBound->getSExtValue() << "]\n");
-
- // Create a new node that checks if the value is < pivot. Go to the
- // left branch if it is and right branch if not.
- Function* F = OrigBlock->getParent();
- BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock");
-
- ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT,
- Val, Pivot.Low, "Pivot");
-
- BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound,
- NewUpperBound, Val, NewNode, OrigBlock,
- Default, UnreachableRanges);
- BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound,
- UpperBound, Val, NewNode, OrigBlock,
- Default, UnreachableRanges);
-
- F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewNode);
- NewNode->getInstList().push_back(Comp);
-
- BranchInst::Create(LBranch, RBranch, Comp, NewNode);
- return NewNode;
-}
-
-/// Create a new leaf block for the binary lookup tree. It checks if the
-/// switch's value == the case's value. If not, then it jumps to the default
-/// branch. At this point in the tree, the value can't be another valid case
-/// value, so the jump to the "default" branch is warranted.
-BasicBlock *LowerSwitch::newLeafBlock(CaseRange &Leaf, Value *Val,
- ConstantInt *LowerBound,
- ConstantInt *UpperBound,
- BasicBlock *OrigBlock,
- BasicBlock *Default) {
- Function* F = OrigBlock->getParent();
- BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
- F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf);
-
- // Emit comparison
- ICmpInst* Comp = nullptr;
- if (Leaf.Low == Leaf.High) {
- // Make the seteq instruction...
- Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val,
- Leaf.Low, "SwitchLeaf");
- } else {
- // Make range comparison
- if (Leaf.Low == LowerBound) {
- // Val >= Min && Val <= Hi --> Val <= Hi
- Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
- "SwitchLeaf");
- } else if (Leaf.High == UpperBound) {
- // Val <= Max && Val >= Lo --> Val >= Lo
- Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SGE, Val, Leaf.Low,
- "SwitchLeaf");
- } else if (Leaf.Low->isZero()) {
- // Val >= 0 && Val <= Hi --> Val <=u Hi
- Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
- "SwitchLeaf");
- } else {
- // Emit V-Lo <=u Hi-Lo
- Constant* NegLo = ConstantExpr::getNeg(Leaf.Low);
- Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo,
- Val->getName()+".off",
- NewLeaf);
- Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
- Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound,
- "SwitchLeaf");
- }
- }
-
- // Make the conditional branch...
- BasicBlock* Succ = Leaf.BB;
- BranchInst::Create(Succ, Default, Comp, NewLeaf);
-
- // If there were any PHI nodes in this successor, rewrite one entry
- // from OrigBlock to come from NewLeaf.
- for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
- PHINode* PN = cast<PHINode>(I);
- // Remove all but one incoming entries from the cluster
- uint64_t Range = Leaf.High->getSExtValue() -
- Leaf.Low->getSExtValue();
- for (uint64_t j = 0; j < Range; ++j) {
- PN->removeIncomingValue(OrigBlock);
- }
-
- int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
- assert(BlockIdx != -1 && "Switch didn't go to this successor??");
- PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf);
- }
-
- return NewLeaf;
-}
-
-/// Transform simple list of \p SI's cases into list of CaseRange's \p Cases.
-/// \post \p Cases wouldn't contain references to \p SI's default BB.
-/// \returns Number of \p SI's cases that do not reference \p SI's default BB.
-unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
- unsigned NumSimpleCases = 0;
-
- // Start with "simple" cases
- for (auto Case : SI->cases()) {
- if (Case.getCaseSuccessor() == SI->getDefaultDest())
- continue;
- Cases.push_back(CaseRange(Case.getCaseValue(), Case.getCaseValue(),
- Case.getCaseSuccessor()));
- ++NumSimpleCases;
- }
-
- llvm::sort(Cases, CaseCmp());
-
- // Merge case into clusters
- if (Cases.size() >= 2) {
- CaseItr I = Cases.begin();
- for (CaseItr J = std::next(I), E = Cases.end(); J != E; ++J) {
- int64_t nextValue = J->Low->getSExtValue();
- int64_t currentValue = I->High->getSExtValue();
- BasicBlock* nextBB = J->BB;
- BasicBlock* currentBB = I->BB;
-
- // If the two neighboring cases go to the same destination, merge them
- // into a single case.
- assert(nextValue > currentValue && "Cases should be strictly ascending");
- if ((nextValue == currentValue + 1) && (currentBB == nextBB)) {
- I->High = J->High;
- // FIXME: Combine branch weights.
- } else if (++I != J) {
- *I = *J;
- }
- }
- Cases.erase(std::next(I), Cases.end());
- }
-
- return NumSimpleCases;
-}
-
-/// Replace the specified switch instruction with a sequence of chained if-then
-/// insts in a balanced binary search.
-void LowerSwitch::processSwitchInst(SwitchInst *SI,
- SmallPtrSetImpl<BasicBlock *> &DeleteList,
- AssumptionCache *AC, LazyValueInfo *LVI) {
- BasicBlock *OrigBlock = SI->getParent();
- Function *F = OrigBlock->getParent();
- Value *Val = SI->getCondition(); // The value we are switching on...
- BasicBlock* Default = SI->getDefaultDest();
-
- // Don't handle unreachable blocks. If there are successors with phis, this
- // would leave them behind with missing predecessors.
- if ((OrigBlock != &F->getEntryBlock() && pred_empty(OrigBlock)) ||
- OrigBlock->getSinglePredecessor() == OrigBlock) {
- DeleteList.insert(OrigBlock);
- return;
- }
-
- // Prepare cases vector.
- CaseVector Cases;
- const unsigned NumSimpleCases = Clusterify(Cases, SI);
- LLVM_DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
- << ". Total non-default cases: " << NumSimpleCases
- << "\nCase clusters: " << Cases << "\n");
-
- // If there is only the default destination, just branch.
- if (Cases.empty()) {
- BranchInst::Create(Default, OrigBlock);
- // Remove all the references from Default's PHIs to OrigBlock, but one.
- fixPhis(Default, OrigBlock, OrigBlock);
- SI->eraseFromParent();
- return;
- }
-
- ConstantInt *LowerBound = nullptr;
- ConstantInt *UpperBound = nullptr;
- bool DefaultIsUnreachableFromSwitch = false;
-
- if (isa<UnreachableInst>(Default->getFirstNonPHIOrDbg())) {
- // Make the bounds tightly fitted around the case value range, because we
- // know that the value passed to the switch must be exactly one of the case
- // values.
- LowerBound = Cases.front().Low;
- UpperBound = Cases.back().High;
- DefaultIsUnreachableFromSwitch = true;
- } else {
- // Constraining the range of the value being switched over helps eliminating
- // unreachable BBs and minimizing the number of `add` instructions
- // newLeafBlock ends up emitting. Running CorrelatedValuePropagation after
- // LowerSwitch isn't as good, and also much more expensive in terms of
- // compile time for the following reasons:
- // 1. it processes many kinds of instructions, not just switches;
- // 2. even if limited to icmp instructions only, it will have to process
- // roughly C icmp's per switch, where C is the number of cases in the
- // switch, while LowerSwitch only needs to call LVI once per switch.
- const DataLayout &DL = F->getParent()->getDataLayout();
- KnownBits Known = computeKnownBits(Val, DL, /*Depth=*/0, AC, SI);
- // TODO Shouldn't this create a signed range?
- ConstantRange KnownBitsRange =
- ConstantRange::fromKnownBits(Known, /*IsSigned=*/false);
- const ConstantRange LVIRange = LVI->getConstantRange(Val, OrigBlock, SI);
- ConstantRange ValRange = KnownBitsRange.intersectWith(LVIRange);
- // We delegate removal of unreachable non-default cases to other passes. In
- // the unlikely event that some of them survived, we just conservatively
- // maintain the invariant that all the cases lie between the bounds. This
- // may, however, still render the default case effectively unreachable.
- APInt Low = Cases.front().Low->getValue();
- APInt High = Cases.back().High->getValue();
- APInt Min = APIntOps::smin(ValRange.getSignedMin(), Low);
- APInt Max = APIntOps::smax(ValRange.getSignedMax(), High);
-
- LowerBound = ConstantInt::get(SI->getContext(), Min);
- UpperBound = ConstantInt::get(SI->getContext(), Max);
- DefaultIsUnreachableFromSwitch = (Min + (NumSimpleCases - 1) == Max);
- }
-
- std::vector<IntRange> UnreachableRanges;
-
- if (DefaultIsUnreachableFromSwitch) {
- DenseMap<BasicBlock *, unsigned> Popularity;
- unsigned MaxPop = 0;
- BasicBlock *PopSucc = nullptr;
-
- IntRange R = {std::numeric_limits<int64_t>::min(),
- std::numeric_limits<int64_t>::max()};
- UnreachableRanges.push_back(R);
- for (const auto &I : Cases) {
- int64_t Low = I.Low->getSExtValue();
- int64_t High = I.High->getSExtValue();
-
- IntRange &LastRange = UnreachableRanges.back();
- if (LastRange.Low == Low) {
- // There is nothing left of the previous range.
- UnreachableRanges.pop_back();
- } else {
- // Terminate the previous range.
- assert(Low > LastRange.Low);
- LastRange.High = Low - 1;
- }
- if (High != std::numeric_limits<int64_t>::max()) {
- IntRange R = { High + 1, std::numeric_limits<int64_t>::max() };
- UnreachableRanges.push_back(R);
- }
-
- // Count popularity.
- int64_t N = High - Low + 1;
- unsigned &Pop = Popularity[I.BB];
- if ((Pop += N) > MaxPop) {
- MaxPop = Pop;
- PopSucc = I.BB;
- }
- }
-#ifndef NDEBUG
- /* UnreachableRanges should be sorted and the ranges non-adjacent. */
- for (auto I = UnreachableRanges.begin(), E = UnreachableRanges.end();
- I != E; ++I) {
- assert(I->Low <= I->High);
- auto Next = I + 1;
- if (Next != E) {
- assert(Next->Low > I->High);
- }
- }
-#endif
-
- // As the default block in the switch is unreachable, update the PHI nodes
- // (remove all of the references to the default block) to reflect this.
- const unsigned NumDefaultEdges = SI->getNumCases() + 1 - NumSimpleCases;
- for (unsigned I = 0; I < NumDefaultEdges; ++I)
- Default->removePredecessor(OrigBlock);
-
- // Use the most popular block as the new default, reducing the number of
- // cases.
- assert(MaxPop > 0 && PopSucc);
- Default = PopSucc;
- Cases.erase(
- llvm::remove_if(
- Cases, [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }),
- Cases.end());
-
- // If there are no cases left, just branch.
- if (Cases.empty()) {
- BranchInst::Create(Default, OrigBlock);
- SI->eraseFromParent();
- // As all the cases have been replaced with a single branch, only keep
- // one entry in the PHI nodes.
- for (unsigned I = 0 ; I < (MaxPop - 1) ; ++I)
- PopSucc->removePredecessor(OrigBlock);
- return;
- }
-
- // If the condition was a PHI node with the switch block as a predecessor
- // removing predecessors may have caused the condition to be erased.
- // Getting the condition value again here protects against that.
- Val = SI->getCondition();
- }
-
- // Create a new, empty default block so that the new hierarchy of
- // if-then statements go to this and the PHI nodes are happy.
- BasicBlock *NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
- F->getBasicBlockList().insert(Default->getIterator(), NewDefault);
- BranchInst::Create(Default, NewDefault);
-
- BasicBlock *SwitchBlock =
- switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val,
- OrigBlock, OrigBlock, NewDefault, UnreachableRanges);
-
- // If there are entries in any PHI nodes for the default edge, make sure
- // to update them as well.
- fixPhis(Default, OrigBlock, NewDefault);
-
- // Branch to our shiny new if-then stuff...
- BranchInst::Create(SwitchBlock, OrigBlock);
-
- // We are now done with the switch instruction, delete it.
- BasicBlock *OldDefault = SI->getDefaultDest();
- OrigBlock->getInstList().erase(SI);
-
- // If the Default block has no more predecessors just add it to DeleteList.
- if (pred_begin(OldDefault) == pred_end(OldDefault))
- DeleteList.insert(OldDefault);
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
deleted file mode 100644
index cd2c81b6abc8..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass is a simple pass wrapper around the PromoteMemToReg function call
-// exposed by the Utils library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/Mem2Reg.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#include <vector>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "mem2reg"
-
-STATISTIC(NumPromoted, "Number of alloca's promoted");
-
-static bool promoteMemoryToRegister(Function &F, DominatorTree &DT,
- AssumptionCache &AC) {
- std::vector<AllocaInst *> Allocas;
- BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
- bool Changed = false;
-
- while (true) {
- Allocas.clear();
-
- // Find allocas that are safe to promote, by looking at all instructions in
- // the entry node
- for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
- if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
- if (isAllocaPromotable(AI))
- Allocas.push_back(AI);
-
- if (Allocas.empty())
- break;
-
- PromoteMemToReg(Allocas, DT, &AC);
- NumPromoted += Allocas.size();
- Changed = true;
- }
- return Changed;
-}
-
-PreservedAnalyses PromotePass::run(Function &F, FunctionAnalysisManager &AM) {
- auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- auto &AC = AM.getResult<AssumptionAnalysis>(F);
- if (!promoteMemoryToRegister(F, DT, AC))
- return PreservedAnalyses::all();
-
- PreservedAnalyses PA;
- PA.preserveSet<CFGAnalyses>();
- return PA;
-}
-
-namespace {
-
-struct PromoteLegacyPass : public FunctionPass {
- // Pass identification, replacement for typeid
- static char ID;
-
- PromoteLegacyPass() : FunctionPass(ID) {
- initializePromoteLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- // runOnFunction - To run this pass, first we calculate the alloca
- // instructions that are safe for promotion, then we promote each one.
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
-
- DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- AssumptionCache &AC =
- getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- return promoteMemoryToRegister(F, DT, AC);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.setPreservesCFG();
- }
-};
-
-} // end anonymous namespace
-
-char PromoteLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(PromoteLegacyPass, "mem2reg", "Promote Memory to "
- "Register",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(PromoteLegacyPass, "mem2reg", "Promote Memory to Register",
- false, false)
-
-// createPromoteMemoryToRegister - Provide an entry point to create this pass.
-FunctionPass *llvm::createPromoteMemoryToRegisterPass() {
- return new PromoteLegacyPass();
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
deleted file mode 100644
index c0b7edc547fd..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-//===- MetaRenamer.cpp - Rename everything with metasyntatic names --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass renames everything with metasyntatic names. The intent is to use
-// this pass after bugpoint reduction to conceal the nature of the original
-// program.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/Argument.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/TypeFinder.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils.h"
-
-using namespace llvm;
-
-static const char *const metaNames[] = {
- // See http://en.wikipedia.org/wiki/Metasyntactic_variable
- "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
- "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam"
-};
-
-namespace {
-
- // This PRNG is from the ISO C spec. It is intentionally simple and
- // unsuitable for cryptographic use. We're just looking for enough
- // variety to surprise and delight users.
- struct PRNG {
- unsigned long next;
-
- void srand(unsigned int seed) {
- next = seed;
- }
-
- int rand() {
- next = next * 1103515245 + 12345;
- return (unsigned int)(next / 65536) % 32768;
- }
- };
-
- struct Renamer {
- Renamer(unsigned int seed) {
- prng.srand(seed);
- }
-
- const char *newName() {
- return metaNames[prng.rand() % array_lengthof(metaNames)];
- }
-
- PRNG prng;
- };
-
- struct MetaRenamer : public ModulePass {
- // Pass identification, replacement for typeid
- static char ID;
-
- MetaRenamer() : ModulePass(ID) {
- initializeMetaRenamerPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.setPreservesAll();
- }
-
- bool runOnModule(Module &M) override {
- // Seed our PRNG with simple additive sum of ModuleID. We're looking to
- // simply avoid always having the same function names, and we need to
- // remain deterministic.
- unsigned int randSeed = 0;
- for (auto C : M.getModuleIdentifier())
- randSeed += C;
-
- Renamer renamer(randSeed);
-
- // Rename all aliases
- for (auto AI = M.alias_begin(), AE = M.alias_end(); AI != AE; ++AI) {
- StringRef Name = AI->getName();
- if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
- continue;
-
- AI->setName("alias");
- }
-
- // Rename all global variables
- for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) {
- StringRef Name = GI->getName();
- if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
- continue;
-
- GI->setName("global");
- }
-
- // Rename all struct types
- TypeFinder StructTypes;
- StructTypes.run(M, true);
- for (StructType *STy : StructTypes) {
- if (STy->isLiteral() || STy->getName().empty()) continue;
-
- SmallString<128> NameStorage;
- STy->setName((Twine("struct.") +
- renamer.newName()).toStringRef(NameStorage));
- }
-
- // Rename all functions
- const TargetLibraryInfo &TLI =
- getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- for (auto &F : M) {
- StringRef Name = F.getName();
- LibFunc Tmp;
- // Leave library functions alone because their presence or absence could
- // affect the behavior of other passes.
- if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
- TLI.getLibFunc(F, Tmp))
- continue;
-
- // Leave @main alone. The output of -metarenamer might be passed to
- // lli for execution and the latter needs a main entry point.
- if (Name != "main")
- F.setName(renamer.newName());
-
- runOnFunction(F);
- }
- return true;
- }
-
- bool runOnFunction(Function &F) {
- for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI)
- if (!AI->getType()->isVoidTy())
- AI->setName("arg");
-
- for (auto &BB : F) {
- BB.setName("bb");
-
- for (auto &I : BB)
- if (!I.getType()->isVoidTy())
- I.setName("tmp");
- }
- return true;
- }
- };
-
-} // end anonymous namespace
-
-char MetaRenamer::ID = 0;
-
-INITIALIZE_PASS_BEGIN(MetaRenamer, "metarenamer",
- "Assign new names to everything", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(MetaRenamer, "metarenamer",
- "Assign new names to everything", false, false)
-
-//===----------------------------------------------------------------------===//
-//
-// MetaRenamer - Rename everything with metasyntactic names.
-//
-ModulePass *llvm::createMetaRenamerPass() {
- return new MetaRenamer();
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
deleted file mode 100644
index c84beceee191..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ /dev/null
@@ -1,282 +0,0 @@
-//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This family of functions perform manipulations on Modules.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/ModuleUtils.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-static void appendToGlobalArray(const char *Array, Module &M, Function *F,
- int Priority, Constant *Data) {
- IRBuilder<> IRB(M.getContext());
- FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
-
- // Get the current set of static global constructors and add the new ctor
- // to the list.
- SmallVector<Constant *, 16> CurrentCtors;
- StructType *EltTy = StructType::get(
- IRB.getInt32Ty(), PointerType::getUnqual(FnTy), IRB.getInt8PtrTy());
- if (GlobalVariable *GVCtor = M.getNamedGlobal(Array)) {
- if (Constant *Init = GVCtor->getInitializer()) {
- unsigned n = Init->getNumOperands();
- CurrentCtors.reserve(n + 1);
- for (unsigned i = 0; i != n; ++i)
- CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
- }
- GVCtor->eraseFromParent();
- }
-
- // Build a 3 field global_ctor entry. We don't take a comdat key.
- Constant *CSVals[3];
- CSVals[0] = IRB.getInt32(Priority);
- CSVals[1] = F;
- CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy())
- : Constant::getNullValue(IRB.getInt8PtrTy());
- Constant *RuntimeCtorInit =
- ConstantStruct::get(EltTy, makeArrayRef(CSVals, EltTy->getNumElements()));
-
- CurrentCtors.push_back(RuntimeCtorInit);
-
- // Create a new initializer.
- ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
- Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
-
- // Create the new global variable and replace all uses of
- // the old global variable with the new one.
- (void)new GlobalVariable(M, NewInit->getType(), false,
- GlobalValue::AppendingLinkage, NewInit, Array);
-}
-
-void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
- appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data);
-}
-
-void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
- appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data);
-}
-
-static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
- GlobalVariable *GV = M.getGlobalVariable(Name);
- SmallPtrSet<Constant *, 16> InitAsSet;
- SmallVector<Constant *, 16> Init;
- if (GV) {
- ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
- for (auto &Op : CA->operands()) {
- Constant *C = cast_or_null<Constant>(Op);
- if (InitAsSet.insert(C).second)
- Init.push_back(C);
- }
- GV->eraseFromParent();
- }
-
- Type *Int8PtrTy = llvm::Type::getInt8PtrTy(M.getContext());
- for (auto *V : Values) {
- Constant *C = ConstantExpr::getBitCast(V, Int8PtrTy);
- if (InitAsSet.insert(C).second)
- Init.push_back(C);
- }
-
- if (Init.empty())
- return;
-
- ArrayType *ATy = ArrayType::get(Int8PtrTy, Init.size());
- GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
- ConstantArray::get(ATy, Init), Name);
- GV->setSection("llvm.metadata");
-}
-
-void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
- appendToUsedList(M, "llvm.used", Values);
-}
-
-void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
- appendToUsedList(M, "llvm.compiler.used", Values);
-}
-
-FunctionCallee
-llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
- ArrayRef<Type *> InitArgTypes) {
- assert(!InitName.empty() && "Expected init function name");
- return M.getOrInsertFunction(
- InitName,
- FunctionType::get(Type::getVoidTy(M.getContext()), InitArgTypes, false),
- AttributeList());
-}
-
-std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
- Module &M, StringRef CtorName, StringRef InitName,
- ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
- StringRef VersionCheckName) {
- assert(!InitName.empty() && "Expected init function name");
- assert(InitArgs.size() == InitArgTypes.size() &&
- "Sanitizer's init function expects different number of arguments");
- FunctionCallee InitFunction =
- declareSanitizerInitFunction(M, InitName, InitArgTypes);
- Function *Ctor = Function::Create(
- FunctionType::get(Type::getVoidTy(M.getContext()), false),
- GlobalValue::InternalLinkage, CtorName, &M);
- BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
- IRBuilder<> IRB(ReturnInst::Create(M.getContext(), CtorBB));
- IRB.CreateCall(InitFunction, InitArgs);
- if (!VersionCheckName.empty()) {
- FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
- VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
- AttributeList());
- IRB.CreateCall(VersionCheckFunction, {});
- }
- return std::make_pair(Ctor, InitFunction);
-}
-
-std::pair<Function *, FunctionCallee>
-llvm::getOrCreateSanitizerCtorAndInitFunctions(
- Module &M, StringRef CtorName, StringRef InitName,
- ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
- function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
- StringRef VersionCheckName) {
- assert(!CtorName.empty() && "Expected ctor function name");
-
- if (Function *Ctor = M.getFunction(CtorName))
- // FIXME: Sink this logic into the module, similar to the handling of
- // globals. This will make moving to a concurrent model much easier.
- if (Ctor->arg_size() == 0 ||
- Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
- return {Ctor, declareSanitizerInitFunction(M, InitName, InitArgTypes)};
-
- Function *Ctor;
- FunctionCallee InitFunction;
- std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
- M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName);
- FunctionsCreatedCallback(Ctor, InitFunction);
- return std::make_pair(Ctor, InitFunction);
-}
-
-Function *llvm::getOrCreateInitFunction(Module &M, StringRef Name) {
- assert(!Name.empty() && "Expected init function name");
- if (Function *F = M.getFunction(Name)) {
- if (F->arg_size() != 0 ||
- F->getReturnType() != Type::getVoidTy(M.getContext())) {
- std::string Err;
- raw_string_ostream Stream(Err);
- Stream << "Sanitizer interface function defined with wrong type: " << *F;
- report_fatal_error(Err);
- }
- return F;
- }
- Function *F =
- cast<Function>(M.getOrInsertFunction(Name, AttributeList(),
- Type::getVoidTy(M.getContext()))
- .getCallee());
-
- appendToGlobalCtors(M, F, 0);
-
- return F;
-}
-
-void llvm::filterDeadComdatFunctions(
- Module &M, SmallVectorImpl<Function *> &DeadComdatFunctions) {
- // Build a map from the comdat to the number of entries in that comdat we
- // think are dead. If this fully covers the comdat group, then the entire
- // group is dead. If we find another entry in the comdat group though, we'll
- // have to preserve the whole group.
- SmallDenseMap<Comdat *, int, 16> ComdatEntriesCovered;
- for (Function *F : DeadComdatFunctions) {
- Comdat *C = F->getComdat();
- assert(C && "Expected all input GVs to be in a comdat!");
- ComdatEntriesCovered[C] += 1;
- }
-
- auto CheckComdat = [&](Comdat &C) {
- auto CI = ComdatEntriesCovered.find(&C);
- if (CI == ComdatEntriesCovered.end())
- return;
-
- // If this could have been covered by a dead entry, just subtract one to
- // account for it.
- if (CI->second > 0) {
- CI->second -= 1;
- return;
- }
-
- // If we've already accounted for all the entries that were dead, the
- // entire comdat is alive so remove it from the map.
- ComdatEntriesCovered.erase(CI);
- };
-
- auto CheckAllComdats = [&] {
- for (Function &F : M.functions())
- if (Comdat *C = F.getComdat()) {
- CheckComdat(*C);
- if (ComdatEntriesCovered.empty())
- return;
- }
- for (GlobalVariable &GV : M.globals())
- if (Comdat *C = GV.getComdat()) {
- CheckComdat(*C);
- if (ComdatEntriesCovered.empty())
- return;
- }
- for (GlobalAlias &GA : M.aliases())
- if (Comdat *C = GA.getComdat()) {
- CheckComdat(*C);
- if (ComdatEntriesCovered.empty())
- return;
- }
- };
- CheckAllComdats();
-
- if (ComdatEntriesCovered.empty()) {
- DeadComdatFunctions.clear();
- return;
- }
-
- // Remove the entries that were not covering.
- erase_if(DeadComdatFunctions, [&](GlobalValue *GV) {
- return ComdatEntriesCovered.find(GV->getComdat()) ==
- ComdatEntriesCovered.end();
- });
-}
-
-std::string llvm::getUniqueModuleId(Module *M) {
- MD5 Md5;
- bool ExportsSymbols = false;
- auto AddGlobal = [&](GlobalValue &GV) {
- if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
- !GV.hasExternalLinkage() || GV.hasComdat())
- return;
- ExportsSymbols = true;
- Md5.update(GV.getName());
- Md5.update(ArrayRef<uint8_t>{0});
- };
-
- for (auto &F : *M)
- AddGlobal(F);
- for (auto &GV : M->globals())
- AddGlobal(GV);
- for (auto &GA : M->aliases())
- AddGlobal(GA);
- for (auto &IF : M->ifuncs())
- AddGlobal(IF);
-
- if (!ExportsSymbols)
- return "";
-
- MD5::MD5Result R;
- Md5.final(R);
-
- SmallString<32> Str;
- MD5::stringifyResult(R, Str);
- return ("$" + Str).str();
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp b/contrib/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
deleted file mode 100644
index ac8991e9d475..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
+++ /dev/null
@@ -1,120 +0,0 @@
-//===- NameAnonGlobals.cpp - ThinLTO Support: Name Unnamed Globals --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements naming anonymous globals to make sure they can be
-// referred to by ThinLTO.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/NameAnonGlobals.h"
-
-#include "llvm/ADT/SmallString.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/MD5.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
-
-using namespace llvm;
-
-namespace {
-// Compute a "unique" hash for the module based on the name of the public
-// globals.
-class ModuleHasher {
- Module &TheModule;
- std::string TheHash;
-
-public:
- ModuleHasher(Module &M) : TheModule(M) {}
-
- /// Return the lazily computed hash.
- std::string &get() {
- if (!TheHash.empty())
- // Cache hit :)
- return TheHash;
-
- MD5 Hasher;
- for (auto &F : TheModule) {
- if (F.isDeclaration() || F.hasLocalLinkage() || !F.hasName())
- continue;
- auto Name = F.getName();
- Hasher.update(Name);
- }
- for (auto &GV : TheModule.globals()) {
- if (GV.isDeclaration() || GV.hasLocalLinkage() || !GV.hasName())
- continue;
- auto Name = GV.getName();
- Hasher.update(Name);
- }
-
- // Now return the result.
- MD5::MD5Result Hash;
- Hasher.final(Hash);
- SmallString<32> Result;
- MD5::stringifyResult(Hash, Result);
- TheHash = Result.str();
- return TheHash;
- }
-};
-} // end anonymous namespace
-
-// Rename all the anon globals in the module
-bool llvm::nameUnamedGlobals(Module &M) {
- bool Changed = false;
- ModuleHasher ModuleHash(M);
- int count = 0;
- auto RenameIfNeed = [&](GlobalValue &GV) {
- if (GV.hasName())
- return;
- GV.setName(Twine("anon.") + ModuleHash.get() + "." + Twine(count++));
- Changed = true;
- };
- for (auto &GO : M.global_objects())
- RenameIfNeed(GO);
- for (auto &GA : M.aliases())
- RenameIfNeed(GA);
-
- return Changed;
-}
-
-namespace {
-
-// Legacy pass that provides a name to every anon globals.
-class NameAnonGlobalLegacyPass : public ModulePass {
-
-public:
- /// Pass identification, replacement for typeid
- static char ID;
-
- /// Specify pass name for debug output
- StringRef getPassName() const override { return "Name Anon Globals"; }
-
- explicit NameAnonGlobalLegacyPass() : ModulePass(ID) {}
-
- bool runOnModule(Module &M) override { return nameUnamedGlobals(M); }
-};
-char NameAnonGlobalLegacyPass::ID = 0;
-
-} // anonymous namespace
-
-PreservedAnalyses NameAnonGlobalPass::run(Module &M,
- ModuleAnalysisManager &AM) {
- if (!nameUnamedGlobals(M))
- return PreservedAnalyses::all();
-
- return PreservedAnalyses::none();
-}
-
-INITIALIZE_PASS_BEGIN(NameAnonGlobalLegacyPass, "name-anon-globals",
- "Provide a name to nameless globals", false, false)
-INITIALIZE_PASS_END(NameAnonGlobalLegacyPass, "name-anon-globals",
- "Provide a name to nameless globals", false, false)
-
-namespace llvm {
-ModulePass *createNameAnonGlobalPass() {
- return new NameAnonGlobalLegacyPass();
-}
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp
deleted file mode 100644
index bdf24d80bd17..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ /dev/null
@@ -1,852 +0,0 @@
-//===-- PredicateInfo.cpp - PredicateInfo Builder--------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------===//
-//
-// This file implements the PredicateInfo class.
-//
-//===----------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/PredicateInfo.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/CFG.h"
-#include "llvm/IR/AssemblyAnnotationWriter.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/PatternMatch.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/DebugCounter.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Transforms/Utils.h"
-#include <algorithm>
-#define DEBUG_TYPE "predicateinfo"
-using namespace llvm;
-using namespace PatternMatch;
-using namespace llvm::PredicateInfoClasses;
-
-INITIALIZE_PASS_BEGIN(PredicateInfoPrinterLegacyPass, "print-predicateinfo",
- "PredicateInfo Printer", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_END(PredicateInfoPrinterLegacyPass, "print-predicateinfo",
- "PredicateInfo Printer", false, false)
-static cl::opt<bool> VerifyPredicateInfo(
- "verify-predicateinfo", cl::init(false), cl::Hidden,
- cl::desc("Verify PredicateInfo in legacy printer pass."));
-DEBUG_COUNTER(RenameCounter, "predicateinfo-rename",
- "Controls which variables are renamed with predicateinfo");
-
-namespace {
-// Given a predicate info that is a type of branching terminator, get the
-// branching block.
-const BasicBlock *getBranchBlock(const PredicateBase *PB) {
- assert(isa<PredicateWithEdge>(PB) &&
- "Only branches and switches should have PHIOnly defs that "
- "require branch blocks.");
- return cast<PredicateWithEdge>(PB)->From;
-}
-
-// Given a predicate info that is a type of branching terminator, get the
-// branching terminator.
-static Instruction *getBranchTerminator(const PredicateBase *PB) {
- assert(isa<PredicateWithEdge>(PB) &&
- "Not a predicate info type we know how to get a terminator from.");
- return cast<PredicateWithEdge>(PB)->From->getTerminator();
-}
-
-// Given a predicate info that is a type of branching terminator, get the
-// edge this predicate info represents
-const std::pair<BasicBlock *, BasicBlock *>
-getBlockEdge(const PredicateBase *PB) {
- assert(isa<PredicateWithEdge>(PB) &&
- "Not a predicate info type we know how to get an edge from.");
- const auto *PEdge = cast<PredicateWithEdge>(PB);
- return std::make_pair(PEdge->From, PEdge->To);
-}
-}
-
-namespace llvm {
-namespace PredicateInfoClasses {
-enum LocalNum {
- // Operations that must appear first in the block.
- LN_First,
- // Operations that are somewhere in the middle of the block, and are sorted on
- // demand.
- LN_Middle,
- // Operations that must appear last in a block, like successor phi node uses.
- LN_Last
-};
-
-// Associate global and local DFS info with defs and uses, so we can sort them
-// into a global domination ordering.
-struct ValueDFS {
- int DFSIn = 0;
- int DFSOut = 0;
- unsigned int LocalNum = LN_Middle;
- // Only one of Def or Use will be set.
- Value *Def = nullptr;
- Use *U = nullptr;
- // Neither PInfo nor EdgeOnly participate in the ordering
- PredicateBase *PInfo = nullptr;
- bool EdgeOnly = false;
-};
-
-// Perform a strict weak ordering on instructions and arguments.
-static bool valueComesBefore(OrderedInstructions &OI, const Value *A,
- const Value *B) {
- auto *ArgA = dyn_cast_or_null<Argument>(A);
- auto *ArgB = dyn_cast_or_null<Argument>(B);
- if (ArgA && !ArgB)
- return true;
- if (ArgB && !ArgA)
- return false;
- if (ArgA && ArgB)
- return ArgA->getArgNo() < ArgB->getArgNo();
- return OI.dfsBefore(cast<Instruction>(A), cast<Instruction>(B));
-}
-
-// This compares ValueDFS structures, creating OrderedBasicBlocks where
-// necessary to compare uses/defs in the same block. Doing so allows us to walk
-// the minimum number of instructions necessary to compute our def/use ordering.
-struct ValueDFS_Compare {
- OrderedInstructions &OI;
- ValueDFS_Compare(OrderedInstructions &OI) : OI(OI) {}
-
- bool operator()(const ValueDFS &A, const ValueDFS &B) const {
- if (&A == &B)
- return false;
- // The only case we can't directly compare them is when they in the same
- // block, and both have localnum == middle. In that case, we have to use
- // comesbefore to see what the real ordering is, because they are in the
- // same basic block.
-
- bool SameBlock = std::tie(A.DFSIn, A.DFSOut) == std::tie(B.DFSIn, B.DFSOut);
-
- // We want to put the def that will get used for a given set of phi uses,
- // before those phi uses.
- // So we sort by edge, then by def.
- // Note that only phi nodes uses and defs can come last.
- if (SameBlock && A.LocalNum == LN_Last && B.LocalNum == LN_Last)
- return comparePHIRelated(A, B);
-
- if (!SameBlock || A.LocalNum != LN_Middle || B.LocalNum != LN_Middle)
- return std::tie(A.DFSIn, A.DFSOut, A.LocalNum, A.Def, A.U) <
- std::tie(B.DFSIn, B.DFSOut, B.LocalNum, B.Def, B.U);
- return localComesBefore(A, B);
- }
-
- // For a phi use, or a non-materialized def, return the edge it represents.
- const std::pair<BasicBlock *, BasicBlock *>
- getBlockEdge(const ValueDFS &VD) const {
- if (!VD.Def && VD.U) {
- auto *PHI = cast<PHINode>(VD.U->getUser());
- return std::make_pair(PHI->getIncomingBlock(*VD.U), PHI->getParent());
- }
- // This is really a non-materialized def.
- return ::getBlockEdge(VD.PInfo);
- }
-
- // For two phi related values, return the ordering.
- bool comparePHIRelated(const ValueDFS &A, const ValueDFS &B) const {
- auto &ABlockEdge = getBlockEdge(A);
- auto &BBlockEdge = getBlockEdge(B);
- // Now sort by block edge and then defs before uses.
- return std::tie(ABlockEdge, A.Def, A.U) < std::tie(BBlockEdge, B.Def, B.U);
- }
-
- // Get the definition of an instruction that occurs in the middle of a block.
- Value *getMiddleDef(const ValueDFS &VD) const {
- if (VD.Def)
- return VD.Def;
- // It's possible for the defs and uses to be null. For branches, the local
- // numbering will say the placed predicaeinfos should go first (IE
- // LN_beginning), so we won't be in this function. For assumes, we will end
- // up here, beause we need to order the def we will place relative to the
- // assume. So for the purpose of ordering, we pretend the def is the assume
- // because that is where we will insert the info.
- if (!VD.U) {
- assert(VD.PInfo &&
- "No def, no use, and no predicateinfo should not occur");
- assert(isa<PredicateAssume>(VD.PInfo) &&
- "Middle of block should only occur for assumes");
- return cast<PredicateAssume>(VD.PInfo)->AssumeInst;
- }
- return nullptr;
- }
-
- // Return either the Def, if it's not null, or the user of the Use, if the def
- // is null.
- const Instruction *getDefOrUser(const Value *Def, const Use *U) const {
- if (Def)
- return cast<Instruction>(Def);
- return cast<Instruction>(U->getUser());
- }
-
- // This performs the necessary local basic block ordering checks to tell
- // whether A comes before B, where both are in the same basic block.
- bool localComesBefore(const ValueDFS &A, const ValueDFS &B) const {
- auto *ADef = getMiddleDef(A);
- auto *BDef = getMiddleDef(B);
-
- // See if we have real values or uses. If we have real values, we are
- // guaranteed they are instructions or arguments. No matter what, we are
- // guaranteed they are in the same block if they are instructions.
- auto *ArgA = dyn_cast_or_null<Argument>(ADef);
- auto *ArgB = dyn_cast_or_null<Argument>(BDef);
-
- if (ArgA || ArgB)
- return valueComesBefore(OI, ArgA, ArgB);
-
- auto *AInst = getDefOrUser(ADef, A.U);
- auto *BInst = getDefOrUser(BDef, B.U);
- return valueComesBefore(OI, AInst, BInst);
- }
-};
-
-} // namespace PredicateInfoClasses
-
-bool PredicateInfo::stackIsInScope(const ValueDFSStack &Stack,
- const ValueDFS &VDUse) const {
- if (Stack.empty())
- return false;
- // If it's a phi only use, make sure it's for this phi node edge, and that the
- // use is in a phi node. If it's anything else, and the top of the stack is
- // EdgeOnly, we need to pop the stack. We deliberately sort phi uses next to
- // the defs they must go with so that we can know it's time to pop the stack
- // when we hit the end of the phi uses for a given def.
- if (Stack.back().EdgeOnly) {
- if (!VDUse.U)
- return false;
- auto *PHI = dyn_cast<PHINode>(VDUse.U->getUser());
- if (!PHI)
- return false;
- // Check edge
- BasicBlock *EdgePred = PHI->getIncomingBlock(*VDUse.U);
- if (EdgePred != getBranchBlock(Stack.back().PInfo))
- return false;
-
- // Use dominates, which knows how to handle edge dominance.
- return DT.dominates(getBlockEdge(Stack.back().PInfo), *VDUse.U);
- }
-
- return (VDUse.DFSIn >= Stack.back().DFSIn &&
- VDUse.DFSOut <= Stack.back().DFSOut);
-}
-
-void PredicateInfo::popStackUntilDFSScope(ValueDFSStack &Stack,
- const ValueDFS &VD) {
- while (!Stack.empty() && !stackIsInScope(Stack, VD))
- Stack.pop_back();
-}
-
-// Convert the uses of Op into a vector of uses, associating global and local
-// DFS info with each one.
-void PredicateInfo::convertUsesToDFSOrdered(
- Value *Op, SmallVectorImpl<ValueDFS> &DFSOrderedSet) {
- for (auto &U : Op->uses()) {
- if (auto *I = dyn_cast<Instruction>(U.getUser())) {
- ValueDFS VD;
- // Put the phi node uses in the incoming block.
- BasicBlock *IBlock;
- if (auto *PN = dyn_cast<PHINode>(I)) {
- IBlock = PN->getIncomingBlock(U);
- // Make phi node users appear last in the incoming block
- // they are from.
- VD.LocalNum = LN_Last;
- } else {
- // If it's not a phi node use, it is somewhere in the middle of the
- // block.
- IBlock = I->getParent();
- VD.LocalNum = LN_Middle;
- }
- DomTreeNode *DomNode = DT.getNode(IBlock);
- // It's possible our use is in an unreachable block. Skip it if so.
- if (!DomNode)
- continue;
- VD.DFSIn = DomNode->getDFSNumIn();
- VD.DFSOut = DomNode->getDFSNumOut();
- VD.U = &U;
- DFSOrderedSet.push_back(VD);
- }
- }
-}
-
-// Collect relevant operations from Comparison that we may want to insert copies
-// for.
-void collectCmpOps(CmpInst *Comparison, SmallVectorImpl<Value *> &CmpOperands) {
- auto *Op0 = Comparison->getOperand(0);
- auto *Op1 = Comparison->getOperand(1);
- if (Op0 == Op1)
- return;
- CmpOperands.push_back(Comparison);
- // Only want real values, not constants. Additionally, operands with one use
- // are only being used in the comparison, which means they will not be useful
- // for us to consider for predicateinfo.
- //
- if ((isa<Instruction>(Op0) || isa<Argument>(Op0)) && !Op0->hasOneUse())
- CmpOperands.push_back(Op0);
- if ((isa<Instruction>(Op1) || isa<Argument>(Op1)) && !Op1->hasOneUse())
- CmpOperands.push_back(Op1);
-}
-
-// Add Op, PB to the list of value infos for Op, and mark Op to be renamed.
-void PredicateInfo::addInfoFor(SmallPtrSetImpl<Value *> &OpsToRename, Value *Op,
- PredicateBase *PB) {
- OpsToRename.insert(Op);
- auto &OperandInfo = getOrCreateValueInfo(Op);
- AllInfos.push_back(PB);
- OperandInfo.Infos.push_back(PB);
-}
-
-// Process an assume instruction and place relevant operations we want to rename
-// into OpsToRename.
-void PredicateInfo::processAssume(IntrinsicInst *II, BasicBlock *AssumeBB,
- SmallPtrSetImpl<Value *> &OpsToRename) {
- // See if we have a comparison we support
- SmallVector<Value *, 8> CmpOperands;
- SmallVector<Value *, 2> ConditionsToProcess;
- CmpInst::Predicate Pred;
- Value *Operand = II->getOperand(0);
- if (m_c_And(m_Cmp(Pred, m_Value(), m_Value()),
- m_Cmp(Pred, m_Value(), m_Value()))
- .match(II->getOperand(0))) {
- ConditionsToProcess.push_back(cast<BinaryOperator>(Operand)->getOperand(0));
- ConditionsToProcess.push_back(cast<BinaryOperator>(Operand)->getOperand(1));
- ConditionsToProcess.push_back(Operand);
- } else if (isa<CmpInst>(Operand)) {
-
- ConditionsToProcess.push_back(Operand);
- }
- for (auto Cond : ConditionsToProcess) {
- if (auto *Cmp = dyn_cast<CmpInst>(Cond)) {
- collectCmpOps(Cmp, CmpOperands);
- // Now add our copy infos for our operands
- for (auto *Op : CmpOperands) {
- auto *PA = new PredicateAssume(Op, II, Cmp);
- addInfoFor(OpsToRename, Op, PA);
- }
- CmpOperands.clear();
- } else if (auto *BinOp = dyn_cast<BinaryOperator>(Cond)) {
- // Otherwise, it should be an AND.
- assert(BinOp->getOpcode() == Instruction::And &&
- "Should have been an AND");
- auto *PA = new PredicateAssume(BinOp, II, BinOp);
- addInfoFor(OpsToRename, BinOp, PA);
- } else {
- llvm_unreachable("Unknown type of condition");
- }
- }
-}
-
-// Process a block terminating branch, and place relevant operations to be
-// renamed into OpsToRename.
-void PredicateInfo::processBranch(BranchInst *BI, BasicBlock *BranchBB,
- SmallPtrSetImpl<Value *> &OpsToRename) {
- BasicBlock *FirstBB = BI->getSuccessor(0);
- BasicBlock *SecondBB = BI->getSuccessor(1);
- SmallVector<BasicBlock *, 2> SuccsToProcess;
- SuccsToProcess.push_back(FirstBB);
- SuccsToProcess.push_back(SecondBB);
- SmallVector<Value *, 2> ConditionsToProcess;
-
- auto InsertHelper = [&](Value *Op, bool isAnd, bool isOr, Value *Cond) {
- for (auto *Succ : SuccsToProcess) {
- // Don't try to insert on a self-edge. This is mainly because we will
- // eliminate during renaming anyway.
- if (Succ == BranchBB)
- continue;
- bool TakenEdge = (Succ == FirstBB);
- // For and, only insert on the true edge
- // For or, only insert on the false edge
- if ((isAnd && !TakenEdge) || (isOr && TakenEdge))
- continue;
- PredicateBase *PB =
- new PredicateBranch(Op, BranchBB, Succ, Cond, TakenEdge);
- addInfoFor(OpsToRename, Op, PB);
- if (!Succ->getSinglePredecessor())
- EdgeUsesOnly.insert({BranchBB, Succ});
- }
- };
-
- // Match combinations of conditions.
- CmpInst::Predicate Pred;
- bool isAnd = false;
- bool isOr = false;
- SmallVector<Value *, 8> CmpOperands;
- if (match(BI->getCondition(), m_And(m_Cmp(Pred, m_Value(), m_Value()),
- m_Cmp(Pred, m_Value(), m_Value()))) ||
- match(BI->getCondition(), m_Or(m_Cmp(Pred, m_Value(), m_Value()),
- m_Cmp(Pred, m_Value(), m_Value())))) {
- auto *BinOp = cast<BinaryOperator>(BI->getCondition());
- if (BinOp->getOpcode() == Instruction::And)
- isAnd = true;
- else if (BinOp->getOpcode() == Instruction::Or)
- isOr = true;
- ConditionsToProcess.push_back(BinOp->getOperand(0));
- ConditionsToProcess.push_back(BinOp->getOperand(1));
- ConditionsToProcess.push_back(BI->getCondition());
- } else if (isa<CmpInst>(BI->getCondition())) {
- ConditionsToProcess.push_back(BI->getCondition());
- }
- for (auto Cond : ConditionsToProcess) {
- if (auto *Cmp = dyn_cast<CmpInst>(Cond)) {
- collectCmpOps(Cmp, CmpOperands);
- // Now add our copy infos for our operands
- for (auto *Op : CmpOperands)
- InsertHelper(Op, isAnd, isOr, Cmp);
- } else if (auto *BinOp = dyn_cast<BinaryOperator>(Cond)) {
- // This must be an AND or an OR.
- assert((BinOp->getOpcode() == Instruction::And ||
- BinOp->getOpcode() == Instruction::Or) &&
- "Should have been an AND or an OR");
- // The actual value of the binop is not subject to the same restrictions
- // as the comparison. It's either true or false on the true/false branch.
- InsertHelper(BinOp, false, false, BinOp);
- } else {
- llvm_unreachable("Unknown type of condition");
- }
- CmpOperands.clear();
- }
-}
-// Process a block terminating switch, and place relevant operations to be
-// renamed into OpsToRename.
-void PredicateInfo::processSwitch(SwitchInst *SI, BasicBlock *BranchBB,
- SmallPtrSetImpl<Value *> &OpsToRename) {
- Value *Op = SI->getCondition();
- if ((!isa<Instruction>(Op) && !isa<Argument>(Op)) || Op->hasOneUse())
- return;
-
- // Remember how many outgoing edges there are to every successor.
- SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
- for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
- BasicBlock *TargetBlock = SI->getSuccessor(i);
- ++SwitchEdges[TargetBlock];
- }
-
- // Now propagate info for each case value
- for (auto C : SI->cases()) {
- BasicBlock *TargetBlock = C.getCaseSuccessor();
- if (SwitchEdges.lookup(TargetBlock) == 1) {
- PredicateSwitch *PS = new PredicateSwitch(
- Op, SI->getParent(), TargetBlock, C.getCaseValue(), SI);
- addInfoFor(OpsToRename, Op, PS);
- if (!TargetBlock->getSinglePredecessor())
- EdgeUsesOnly.insert({BranchBB, TargetBlock});
- }
- }
-}
-
-// Build predicate info for our function
-void PredicateInfo::buildPredicateInfo() {
- DT.updateDFSNumbers();
- // Collect operands to rename from all conditional branch terminators, as well
- // as assume statements.
- SmallPtrSet<Value *, 8> OpsToRename;
- for (auto DTN : depth_first(DT.getRootNode())) {
- BasicBlock *BranchBB = DTN->getBlock();
- if (auto *BI = dyn_cast<BranchInst>(BranchBB->getTerminator())) {
- if (!BI->isConditional())
- continue;
- // Can't insert conditional information if they all go to the same place.
- if (BI->getSuccessor(0) == BI->getSuccessor(1))
- continue;
- processBranch(BI, BranchBB, OpsToRename);
- } else if (auto *SI = dyn_cast<SwitchInst>(BranchBB->getTerminator())) {
- processSwitch(SI, BranchBB, OpsToRename);
- }
- }
- for (auto &Assume : AC.assumptions()) {
- if (auto *II = dyn_cast_or_null<IntrinsicInst>(Assume))
- if (DT.isReachableFromEntry(II->getParent()))
- processAssume(II, II->getParent(), OpsToRename);
- }
- // Now rename all our operations.
- renameUses(OpsToRename);
-}
-
-// Create a ssa_copy declaration with custom mangling, because
-// Intrinsic::getDeclaration does not handle overloaded unnamed types properly:
-// all unnamed types get mangled to the same string. We use the pointer
-// to the type as name here, as it guarantees unique names for different
-// types and we remove the declarations when destroying PredicateInfo.
-// It is a workaround for PR38117, because solving it in a fully general way is
-// tricky (FIXME).
-static Function *getCopyDeclaration(Module *M, Type *Ty) {
- std::string Name = "llvm.ssa.copy." + utostr((uintptr_t) Ty);
- return cast<Function>(
- M->getOrInsertFunction(Name,
- getType(M->getContext(), Intrinsic::ssa_copy, Ty))
- .getCallee());
-}
-
-// Given the renaming stack, make all the operands currently on the stack real
-// by inserting them into the IR. Return the last operation's value.
-Value *PredicateInfo::materializeStack(unsigned int &Counter,
- ValueDFSStack &RenameStack,
- Value *OrigOp) {
- // Find the first thing we have to materialize
- auto RevIter = RenameStack.rbegin();
- for (; RevIter != RenameStack.rend(); ++RevIter)
- if (RevIter->Def)
- break;
-
- size_t Start = RevIter - RenameStack.rbegin();
- // The maximum number of things we should be trying to materialize at once
- // right now is 4, depending on if we had an assume, a branch, and both used
- // and of conditions.
- for (auto RenameIter = RenameStack.end() - Start;
- RenameIter != RenameStack.end(); ++RenameIter) {
- auto *Op =
- RenameIter == RenameStack.begin() ? OrigOp : (RenameIter - 1)->Def;
- ValueDFS &Result = *RenameIter;
- auto *ValInfo = Result.PInfo;
- // For edge predicates, we can just place the operand in the block before
- // the terminator. For assume, we have to place it right before the assume
- // to ensure we dominate all of our uses. Always insert right before the
- // relevant instruction (terminator, assume), so that we insert in proper
- // order in the case of multiple predicateinfo in the same block.
- if (isa<PredicateWithEdge>(ValInfo)) {
- IRBuilder<> B(getBranchTerminator(ValInfo));
- Function *IF = getCopyDeclaration(F.getParent(), Op->getType());
- if (empty(IF->users()))
- CreatedDeclarations.insert(IF);
- CallInst *PIC =
- B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++));
- PredicateMap.insert({PIC, ValInfo});
- Result.Def = PIC;
- } else {
- auto *PAssume = dyn_cast<PredicateAssume>(ValInfo);
- assert(PAssume &&
- "Should not have gotten here without it being an assume");
- IRBuilder<> B(PAssume->AssumeInst);
- Function *IF = getCopyDeclaration(F.getParent(), Op->getType());
- if (empty(IF->users()))
- CreatedDeclarations.insert(IF);
- CallInst *PIC = B.CreateCall(IF, Op);
- PredicateMap.insert({PIC, ValInfo});
- Result.Def = PIC;
- }
- }
- return RenameStack.back().Def;
-}
-
-// Instead of the standard SSA renaming algorithm, which is O(Number of
-// instructions), and walks the entire dominator tree, we walk only the defs +
-// uses. The standard SSA renaming algorithm does not really rely on the
-// dominator tree except to order the stack push/pops of the renaming stacks, so
-// that defs end up getting pushed before hitting the correct uses. This does
-// not require the dominator tree, only the *order* of the dominator tree. The
-// complete and correct ordering of the defs and uses, in dominator tree is
-// contained in the DFS numbering of the dominator tree. So we sort the defs and
-// uses into the DFS ordering, and then just use the renaming stack as per
-// normal, pushing when we hit a def (which is a predicateinfo instruction),
-// popping when we are out of the dfs scope for that def, and replacing any uses
-// with top of stack if it exists. In order to handle liveness without
-// propagating liveness info, we don't actually insert the predicateinfo
-// instruction def until we see a use that it would dominate. Once we see such
-// a use, we materialize the predicateinfo instruction in the right place and
-// use it.
-//
-// TODO: Use this algorithm to perform fast single-variable renaming in
-// promotememtoreg and memoryssa.
-void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {
- // Sort OpsToRename since we are going to iterate it.
- SmallVector<Value *, 8> OpsToRename(OpSet.begin(), OpSet.end());
- auto Comparator = [&](const Value *A, const Value *B) {
- return valueComesBefore(OI, A, B);
- };
- llvm::sort(OpsToRename, Comparator);
- ValueDFS_Compare Compare(OI);
- // Compute liveness, and rename in O(uses) per Op.
- for (auto *Op : OpsToRename) {
- LLVM_DEBUG(dbgs() << "Visiting " << *Op << "\n");
- unsigned Counter = 0;
- SmallVector<ValueDFS, 16> OrderedUses;
- const auto &ValueInfo = getValueInfo(Op);
- // Insert the possible copies into the def/use list.
- // They will become real copies if we find a real use for them, and never
- // created otherwise.
- for (auto &PossibleCopy : ValueInfo.Infos) {
- ValueDFS VD;
- // Determine where we are going to place the copy by the copy type.
- // The predicate info for branches always come first, they will get
- // materialized in the split block at the top of the block.
- // The predicate info for assumes will be somewhere in the middle,
- // it will get materialized in front of the assume.
- if (const auto *PAssume = dyn_cast<PredicateAssume>(PossibleCopy)) {
- VD.LocalNum = LN_Middle;
- DomTreeNode *DomNode = DT.getNode(PAssume->AssumeInst->getParent());
- if (!DomNode)
- continue;
- VD.DFSIn = DomNode->getDFSNumIn();
- VD.DFSOut = DomNode->getDFSNumOut();
- VD.PInfo = PossibleCopy;
- OrderedUses.push_back(VD);
- } else if (isa<PredicateWithEdge>(PossibleCopy)) {
- // If we can only do phi uses, we treat it like it's in the branch
- // block, and handle it specially. We know that it goes last, and only
- // dominate phi uses.
- auto BlockEdge = getBlockEdge(PossibleCopy);
- if (EdgeUsesOnly.count(BlockEdge)) {
- VD.LocalNum = LN_Last;
- auto *DomNode = DT.getNode(BlockEdge.first);
- if (DomNode) {
- VD.DFSIn = DomNode->getDFSNumIn();
- VD.DFSOut = DomNode->getDFSNumOut();
- VD.PInfo = PossibleCopy;
- VD.EdgeOnly = true;
- OrderedUses.push_back(VD);
- }
- } else {
- // Otherwise, we are in the split block (even though we perform
- // insertion in the branch block).
- // Insert a possible copy at the split block and before the branch.
- VD.LocalNum = LN_First;
- auto *DomNode = DT.getNode(BlockEdge.second);
- if (DomNode) {
- VD.DFSIn = DomNode->getDFSNumIn();
- VD.DFSOut = DomNode->getDFSNumOut();
- VD.PInfo = PossibleCopy;
- OrderedUses.push_back(VD);
- }
- }
- }
- }
-
- convertUsesToDFSOrdered(Op, OrderedUses);
- // Here we require a stable sort because we do not bother to try to
- // assign an order to the operands the uses represent. Thus, two
- // uses in the same instruction do not have a strict sort order
- // currently and will be considered equal. We could get rid of the
- // stable sort by creating one if we wanted.
- llvm::stable_sort(OrderedUses, Compare);
- SmallVector<ValueDFS, 8> RenameStack;
- // For each use, sorted into dfs order, push values and replaces uses with
- // top of stack, which will represent the reaching def.
- for (auto &VD : OrderedUses) {
- // We currently do not materialize copy over copy, but we should decide if
- // we want to.
- bool PossibleCopy = VD.PInfo != nullptr;
- if (RenameStack.empty()) {
- LLVM_DEBUG(dbgs() << "Rename Stack is empty\n");
- } else {
- LLVM_DEBUG(dbgs() << "Rename Stack Top DFS numbers are ("
- << RenameStack.back().DFSIn << ","
- << RenameStack.back().DFSOut << ")\n");
- }
-
- LLVM_DEBUG(dbgs() << "Current DFS numbers are (" << VD.DFSIn << ","
- << VD.DFSOut << ")\n");
-
- bool ShouldPush = (VD.Def || PossibleCopy);
- bool OutOfScope = !stackIsInScope(RenameStack, VD);
- if (OutOfScope || ShouldPush) {
- // Sync to our current scope.
- popStackUntilDFSScope(RenameStack, VD);
- if (ShouldPush) {
- RenameStack.push_back(VD);
- }
- }
- // If we get to this point, and the stack is empty we must have a use
- // with no renaming needed, just skip it.
- if (RenameStack.empty())
- continue;
- // Skip values, only want to rename the uses
- if (VD.Def || PossibleCopy)
- continue;
- if (!DebugCounter::shouldExecute(RenameCounter)) {
- LLVM_DEBUG(dbgs() << "Skipping execution due to debug counter\n");
- continue;
- }
- ValueDFS &Result = RenameStack.back();
-
- // If the possible copy dominates something, materialize our stack up to
- // this point. This ensures every comparison that affects our operation
- // ends up with predicateinfo.
- if (!Result.Def)
- Result.Def = materializeStack(Counter, RenameStack, Op);
-
- LLVM_DEBUG(dbgs() << "Found replacement " << *Result.Def << " for "
- << *VD.U->get() << " in " << *(VD.U->getUser())
- << "\n");
- assert(DT.dominates(cast<Instruction>(Result.Def), *VD.U) &&
- "Predicateinfo def should have dominated this use");
- VD.U->set(Result.Def);
- }
- }
-}
-
-PredicateInfo::ValueInfo &PredicateInfo::getOrCreateValueInfo(Value *Operand) {
- auto OIN = ValueInfoNums.find(Operand);
- if (OIN == ValueInfoNums.end()) {
- // This will grow it
- ValueInfos.resize(ValueInfos.size() + 1);
- // This will use the new size and give us a 0 based number of the info
- auto InsertResult = ValueInfoNums.insert({Operand, ValueInfos.size() - 1});
- assert(InsertResult.second && "Value info number already existed?");
- return ValueInfos[InsertResult.first->second];
- }
- return ValueInfos[OIN->second];
-}
-
-const PredicateInfo::ValueInfo &
-PredicateInfo::getValueInfo(Value *Operand) const {
- auto OINI = ValueInfoNums.lookup(Operand);
- assert(OINI != 0 && "Operand was not really in the Value Info Numbers");
- assert(OINI < ValueInfos.size() &&
- "Value Info Number greater than size of Value Info Table");
- return ValueInfos[OINI];
-}
-
-PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT,
- AssumptionCache &AC)
- : F(F), DT(DT), AC(AC), OI(&DT) {
- // Push an empty operand info so that we can detect 0 as not finding one
- ValueInfos.resize(1);
- buildPredicateInfo();
-}
-
-// Remove all declarations we created . The PredicateInfo consumers are
-// responsible for remove the ssa_copy calls created.
-PredicateInfo::~PredicateInfo() {
- // Collect function pointers in set first, as SmallSet uses a SmallVector
- // internally and we have to remove the asserting value handles first.
- SmallPtrSet<Function *, 20> FunctionPtrs;
- for (auto &F : CreatedDeclarations)
- FunctionPtrs.insert(&*F);
- CreatedDeclarations.clear();
-
- for (Function *F : FunctionPtrs) {
- assert(F->user_begin() == F->user_end() &&
- "PredicateInfo consumer did not remove all SSA copies.");
- F->eraseFromParent();
- }
-}
-
-void PredicateInfo::verifyPredicateInfo() const {}
-
-char PredicateInfoPrinterLegacyPass::ID = 0;
-
-PredicateInfoPrinterLegacyPass::PredicateInfoPrinterLegacyPass()
- : FunctionPass(ID) {
- initializePredicateInfoPrinterLegacyPassPass(
- *PassRegistry::getPassRegistry());
-}
-
-void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequiredTransitive<DominatorTreeWrapperPass>();
- AU.addRequired<AssumptionCacheTracker>();
-}
-
-// Replace ssa_copy calls created by PredicateInfo with their operand.
-static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) {
- for (auto I = inst_begin(F), E = inst_end(F); I != E;) {
- Instruction *Inst = &*I++;
- const auto *PI = PredInfo.getPredicateInfoFor(Inst);
- auto *II = dyn_cast<IntrinsicInst>(Inst);
- if (!PI || !II || II->getIntrinsicID() != Intrinsic::ssa_copy)
- continue;
-
- Inst->replaceAllUsesWith(II->getOperand(0));
- Inst->eraseFromParent();
- }
-}
-
-bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto PredInfo = make_unique<PredicateInfo>(F, DT, AC);
- PredInfo->print(dbgs());
- if (VerifyPredicateInfo)
- PredInfo->verifyPredicateInfo();
-
- replaceCreatedSSACopys(*PredInfo, F);
- return false;
-}
-
-PreservedAnalyses PredicateInfoPrinterPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- auto &AC = AM.getResult<AssumptionAnalysis>(F);
- OS << "PredicateInfo for function: " << F.getName() << "\n";
- auto PredInfo = make_unique<PredicateInfo>(F, DT, AC);
- PredInfo->print(OS);
-
- replaceCreatedSSACopys(*PredInfo, F);
- return PreservedAnalyses::all();
-}
-
-/// An assembly annotator class to print PredicateInfo information in
-/// comments.
-class PredicateInfoAnnotatedWriter : public AssemblyAnnotationWriter {
- friend class PredicateInfo;
- const PredicateInfo *PredInfo;
-
-public:
- PredicateInfoAnnotatedWriter(const PredicateInfo *M) : PredInfo(M) {}
-
- virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
- formatted_raw_ostream &OS) {}
-
- virtual void emitInstructionAnnot(const Instruction *I,
- formatted_raw_ostream &OS) {
- if (const auto *PI = PredInfo->getPredicateInfoFor(I)) {
- OS << "; Has predicate info\n";
- if (const auto *PB = dyn_cast<PredicateBranch>(PI)) {
- OS << "; branch predicate info { TrueEdge: " << PB->TrueEdge
- << " Comparison:" << *PB->Condition << " Edge: [";
- PB->From->printAsOperand(OS);
- OS << ",";
- PB->To->printAsOperand(OS);
- OS << "] }\n";
- } else if (const auto *PS = dyn_cast<PredicateSwitch>(PI)) {
- OS << "; switch predicate info { CaseValue: " << *PS->CaseValue
- << " Switch:" << *PS->Switch << " Edge: [";
- PS->From->printAsOperand(OS);
- OS << ",";
- PS->To->printAsOperand(OS);
- OS << "] }\n";
- } else if (const auto *PA = dyn_cast<PredicateAssume>(PI)) {
- OS << "; assume predicate info {"
- << " Comparison:" << *PA->Condition << " }\n";
- }
- }
- }
-};
-
-void PredicateInfo::print(raw_ostream &OS) const {
- PredicateInfoAnnotatedWriter Writer(this);
- F.print(OS, &Writer);
-}
-
-void PredicateInfo::dump() const {
- PredicateInfoAnnotatedWriter Writer(this);
- F.print(dbgs(), &Writer);
-}
-
-PreservedAnalyses PredicateInfoVerifierPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- auto &AC = AM.getResult<AssumptionAnalysis>(F);
- make_unique<PredicateInfo>(F, DT, AC)->verifyPredicateInfo();
-
- return PreservedAnalyses::all();
-}
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
deleted file mode 100644
index d58e1ea574ef..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ /dev/null
@@ -1,1007 +0,0 @@
-//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file promotes memory references to be register references. It promotes
-// alloca instructions which only have loads and stores as uses. An alloca is
-// transformed by using iterated dominator frontiers to place PHI nodes, then
-// traversing the function in depth-first order to rewrite loads and stores as
-// appropriate.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/IteratedDominanceFrontier.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/User.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#include <algorithm>
-#include <cassert>
-#include <iterator>
-#include <utility>
-#include <vector>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "mem2reg"
-
-STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
-STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store");
-STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
-STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
-
-bool llvm::isAllocaPromotable(const AllocaInst *AI) {
- // FIXME: If the memory unit is of pointer or integer type, we can permit
- // assignments to subsections of the memory unit.
- unsigned AS = AI->getType()->getAddressSpace();
-
- // Only allow direct and non-volatile loads and stores...
- for (const User *U : AI->users()) {
- if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
- // Note that atomic loads can be transformed; atomic semantics do
- // not have any meaning for a local alloca.
- if (LI->isVolatile())
- return false;
- } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
- if (SI->getOperand(0) == AI)
- return false; // Don't allow a store OF the AI, only INTO the AI.
- // Note that atomic stores can be transformed; atomic semantics do
- // not have any meaning for a local alloca.
- if (SI->isVolatile())
- return false;
- } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
- if (!II->isLifetimeStartOrEnd())
- return false;
- } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
- if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
- return false;
- if (!onlyUsedByLifetimeMarkers(BCI))
- return false;
- } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
- if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
- return false;
- if (!GEPI->hasAllZeroIndices())
- return false;
- if (!onlyUsedByLifetimeMarkers(GEPI))
- return false;
- } else {
- return false;
- }
- }
-
- return true;
-}
-
-namespace {
-
-struct AllocaInfo {
- SmallVector<BasicBlock *, 32> DefiningBlocks;
- SmallVector<BasicBlock *, 32> UsingBlocks;
-
- StoreInst *OnlyStore;
- BasicBlock *OnlyBlock;
- bool OnlyUsedInOneBlock;
-
- TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares;
-
- void clear() {
- DefiningBlocks.clear();
- UsingBlocks.clear();
- OnlyStore = nullptr;
- OnlyBlock = nullptr;
- OnlyUsedInOneBlock = true;
- DbgDeclares.clear();
- }
-
- /// Scan the uses of the specified alloca, filling in the AllocaInfo used
- /// by the rest of the pass to reason about the uses of this alloca.
- void AnalyzeAlloca(AllocaInst *AI) {
- clear();
-
- // As we scan the uses of the alloca instruction, keep track of stores,
- // and decide whether all of the loads and stores to the alloca are within
- // the same basic block.
- for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
- Instruction *User = cast<Instruction>(*UI++);
-
- if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
- // Remember the basic blocks which define new values for the alloca
- DefiningBlocks.push_back(SI->getParent());
- OnlyStore = SI;
- } else {
- LoadInst *LI = cast<LoadInst>(User);
- // Otherwise it must be a load instruction, keep track of variable
- // reads.
- UsingBlocks.push_back(LI->getParent());
- }
-
- if (OnlyUsedInOneBlock) {
- if (!OnlyBlock)
- OnlyBlock = User->getParent();
- else if (OnlyBlock != User->getParent())
- OnlyUsedInOneBlock = false;
- }
- }
-
- DbgDeclares = FindDbgAddrUses(AI);
- }
-};
-
-/// Data package used by RenamePass().
-struct RenamePassData {
- using ValVector = std::vector<Value *>;
- using LocationVector = std::vector<DebugLoc>;
-
- RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V, LocationVector L)
- : BB(B), Pred(P), Values(std::move(V)), Locations(std::move(L)) {}
-
- BasicBlock *BB;
- BasicBlock *Pred;
- ValVector Values;
- LocationVector Locations;
-};
-
-/// This assigns and keeps a per-bb relative ordering of load/store
-/// instructions in the block that directly load or store an alloca.
-///
-/// This functionality is important because it avoids scanning large basic
-/// blocks multiple times when promoting many allocas in the same block.
-class LargeBlockInfo {
- /// For each instruction that we track, keep the index of the
- /// instruction.
- ///
- /// The index starts out as the number of the instruction from the start of
- /// the block.
- DenseMap<const Instruction *, unsigned> InstNumbers;
-
-public:
-
- /// This code only looks at accesses to allocas.
- static bool isInterestingInstruction(const Instruction *I) {
- return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
- (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
- }
-
- /// Get or calculate the index of the specified instruction.
- unsigned getInstructionIndex(const Instruction *I) {
- assert(isInterestingInstruction(I) &&
- "Not a load/store to/from an alloca?");
-
- // If we already have this instruction number, return it.
- DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
- if (It != InstNumbers.end())
- return It->second;
-
- // Scan the whole block to get the instruction. This accumulates
- // information for every interesting instruction in the block, in order to
- // avoid gratuitus rescans.
- const BasicBlock *BB = I->getParent();
- unsigned InstNo = 0;
- for (const Instruction &BBI : *BB)
- if (isInterestingInstruction(&BBI))
- InstNumbers[&BBI] = InstNo++;
- It = InstNumbers.find(I);
-
- assert(It != InstNumbers.end() && "Didn't insert instruction?");
- return It->second;
- }
-
- void deleteValue(const Instruction *I) { InstNumbers.erase(I); }
-
- void clear() { InstNumbers.clear(); }
-};
-
-struct PromoteMem2Reg {
- /// The alloca instructions being promoted.
- std::vector<AllocaInst *> Allocas;
-
- DominatorTree &DT;
- DIBuilder DIB;
-
- /// A cache of @llvm.assume intrinsics used by SimplifyInstruction.
- AssumptionCache *AC;
-
- const SimplifyQuery SQ;
-
- /// Reverse mapping of Allocas.
- DenseMap<AllocaInst *, unsigned> AllocaLookup;
-
- /// The PhiNodes we're adding.
- ///
- /// That map is used to simplify some Phi nodes as we iterate over it, so
- /// it should have deterministic iterators. We could use a MapVector, but
- /// since we already maintain a map from BasicBlock* to a stable numbering
- /// (BBNumbers), the DenseMap is more efficient (also supports removal).
- DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes;
-
- /// For each PHI node, keep track of which entry in Allocas it corresponds
- /// to.
- DenseMap<PHINode *, unsigned> PhiToAllocaMap;
-
- /// For each alloca, we keep track of the dbg.declare intrinsic that
- /// describes it, if any, so that we can convert it to a dbg.value
- /// intrinsic if the alloca gets promoted.
- SmallVector<TinyPtrVector<DbgVariableIntrinsic *>, 8> AllocaDbgDeclares;
-
- /// The set of basic blocks the renamer has already visited.
- SmallPtrSet<BasicBlock *, 16> Visited;
-
- /// Contains a stable numbering of basic blocks to avoid non-determinstic
- /// behavior.
- DenseMap<BasicBlock *, unsigned> BBNumbers;
-
- /// Lazily compute the number of predecessors a block has.
- DenseMap<const BasicBlock *, unsigned> BBNumPreds;
-
-public:
- PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
- AssumptionCache *AC)
- : Allocas(Allocas.begin(), Allocas.end()), DT(DT),
- DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false),
- AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(),
- nullptr, &DT, AC) {}
-
- void run();
-
-private:
- void RemoveFromAllocasList(unsigned &AllocaIdx) {
- Allocas[AllocaIdx] = Allocas.back();
- Allocas.pop_back();
- --AllocaIdx;
- }
-
- unsigned getNumPreds(const BasicBlock *BB) {
- unsigned &NP = BBNumPreds[BB];
- if (NP == 0)
- NP = pred_size(BB) + 1;
- return NP - 1;
- }
-
- void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
- const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
- SmallPtrSetImpl<BasicBlock *> &LiveInBlocks);
- void RenamePass(BasicBlock *BB, BasicBlock *Pred,
- RenamePassData::ValVector &IncVals,
- RenamePassData::LocationVector &IncLocs,
- std::vector<RenamePassData> &Worklist);
- bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
-};
-
-} // end anonymous namespace
-
-/// Given a LoadInst LI this adds assume(LI != null) after it.
-static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
- Function *AssumeIntrinsic =
- Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume);
- ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI,
- Constant::getNullValue(LI->getType()));
- LoadNotNull->insertAfter(LI);
- CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull});
- CI->insertAfter(LoadNotNull);
- AC->registerAssumption(CI);
-}
-
-static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
- // Knowing that this alloca is promotable, we know that it's safe to kill all
- // instructions except for load and store.
-
- for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) {
- Instruction *I = cast<Instruction>(*UI);
- ++UI;
- if (isa<LoadInst>(I) || isa<StoreInst>(I))
- continue;
-
- if (!I->getType()->isVoidTy()) {
- // The only users of this bitcast/GEP instruction are lifetime intrinsics.
- // Follow the use/def chain to erase them now instead of leaving it for
- // dead code elimination later.
- for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) {
- Instruction *Inst = cast<Instruction>(*UUI);
- ++UUI;
- Inst->eraseFromParent();
- }
- }
- I->eraseFromParent();
- }
-}
-
-/// Rewrite as many loads as possible given a single store.
-///
-/// When there is only a single store, we can use the domtree to trivially
-/// replace all of the dominated loads with the stored value. Do so, and return
-/// true if this has successfully promoted the alloca entirely. If this returns
-/// false there were some loads which were not dominated by the single store
-/// and thus must be phi-ed with undef. We fall back to the standard alloca
-/// promotion algorithm in that case.
-static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
- LargeBlockInfo &LBI, const DataLayout &DL,
- DominatorTree &DT, AssumptionCache *AC) {
- StoreInst *OnlyStore = Info.OnlyStore;
- bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
- BasicBlock *StoreBB = OnlyStore->getParent();
- int StoreIndex = -1;
-
- // Clear out UsingBlocks. We will reconstruct it here if needed.
- Info.UsingBlocks.clear();
-
- for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
- Instruction *UserInst = cast<Instruction>(*UI++);
- if (UserInst == OnlyStore)
- continue;
- LoadInst *LI = cast<LoadInst>(UserInst);
-
- // Okay, if we have a load from the alloca, we want to replace it with the
- // only value stored to the alloca. We can do this if the value is
- // dominated by the store. If not, we use the rest of the mem2reg machinery
- // to insert the phi nodes as needed.
- if (!StoringGlobalVal) { // Non-instructions are always dominated.
- if (LI->getParent() == StoreBB) {
- // If we have a use that is in the same block as the store, compare the
- // indices of the two instructions to see which one came first. If the
- // load came before the store, we can't handle it.
- if (StoreIndex == -1)
- StoreIndex = LBI.getInstructionIndex(OnlyStore);
-
- if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
- // Can't handle this load, bail out.
- Info.UsingBlocks.push_back(StoreBB);
- continue;
- }
- } else if (!DT.dominates(StoreBB, LI->getParent())) {
- // If the load and store are in different blocks, use BB dominance to
- // check their relationships. If the store doesn't dom the use, bail
- // out.
- Info.UsingBlocks.push_back(LI->getParent());
- continue;
- }
- }
-
- // Otherwise, we *can* safely rewrite this load.
- Value *ReplVal = OnlyStore->getOperand(0);
- // If the replacement value is the load, this must occur in unreachable
- // code.
- if (ReplVal == LI)
- ReplVal = UndefValue::get(LI->getType());
-
- // If the load was marked as nonnull we don't want to lose
- // that information when we erase this Load. So we preserve
- // it with an assume.
- if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
- !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
- addAssumeNonNull(AC, LI);
-
- LI->replaceAllUsesWith(ReplVal);
- LI->eraseFromParent();
- LBI.deleteValue(LI);
- }
-
- // Finally, after the scan, check to see if the store is all that is left.
- if (!Info.UsingBlocks.empty())
- return false; // If not, we'll have to fall back for the remainder.
-
- // Record debuginfo for the store and remove the declaration's
- // debuginfo.
- for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
- DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
- ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB);
- DII->eraseFromParent();
- }
- // Remove the (now dead) store and alloca.
- Info.OnlyStore->eraseFromParent();
- LBI.deleteValue(Info.OnlyStore);
-
- AI->eraseFromParent();
- return true;
-}
-
-/// Many allocas are only used within a single basic block. If this is the
-/// case, avoid traversing the CFG and inserting a lot of potentially useless
-/// PHI nodes by just performing a single linear pass over the basic block
-/// using the Alloca.
-///
-/// If we cannot promote this alloca (because it is read before it is written),
-/// return false. This is necessary in cases where, due to control flow, the
-/// alloca is undefined only on some control flow paths. e.g. code like
-/// this is correct in LLVM IR:
-/// // A is an alloca with no stores so far
-/// for (...) {
-/// int t = *A;
-/// if (!first_iteration)
-/// use(t);
-/// *A = 42;
-/// }
-static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
- LargeBlockInfo &LBI,
- const DataLayout &DL,
- DominatorTree &DT,
- AssumptionCache *AC) {
- // The trickiest case to handle is when we have large blocks. Because of this,
- // this code is optimized assuming that large blocks happen. This does not
- // significantly pessimize the small block case. This uses LargeBlockInfo to
- // make it efficient to get the index of various operations in the block.
-
- // Walk the use-def list of the alloca, getting the locations of all stores.
- using StoresByIndexTy = SmallVector<std::pair<unsigned, StoreInst *>, 64>;
- StoresByIndexTy StoresByIndex;
-
- for (User *U : AI->users())
- if (StoreInst *SI = dyn_cast<StoreInst>(U))
- StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
-
- // Sort the stores by their index, making it efficient to do a lookup with a
- // binary search.
- llvm::sort(StoresByIndex, less_first());
-
- // Walk all of the loads from this alloca, replacing them with the nearest
- // store above them, if any.
- for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
- LoadInst *LI = dyn_cast<LoadInst>(*UI++);
- if (!LI)
- continue;
-
- unsigned LoadIdx = LBI.getInstructionIndex(LI);
-
- // Find the nearest store that has a lower index than this load.
- StoresByIndexTy::iterator I = llvm::lower_bound(
- StoresByIndex,
- std::make_pair(LoadIdx, static_cast<StoreInst *>(nullptr)),
- less_first());
- if (I == StoresByIndex.begin()) {
- if (StoresByIndex.empty())
- // If there are no stores, the load takes the undef value.
- LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
- else
- // There is no store before this load, bail out (load may be affected
- // by the following stores - see main comment).
- return false;
- } else {
- // Otherwise, there was a store before this load, the load takes its value.
- // Note, if the load was marked as nonnull we don't want to lose that
- // information when we erase it. So we preserve it with an assume.
- Value *ReplVal = std::prev(I)->second->getOperand(0);
- if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
- !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
- addAssumeNonNull(AC, LI);
-
- // If the replacement value is the load, this must occur in unreachable
- // code.
- if (ReplVal == LI)
- ReplVal = UndefValue::get(LI->getType());
-
- LI->replaceAllUsesWith(ReplVal);
- }
-
- LI->eraseFromParent();
- LBI.deleteValue(LI);
- }
-
- // Remove the (now dead) stores and alloca.
- while (!AI->use_empty()) {
- StoreInst *SI = cast<StoreInst>(AI->user_back());
- // Record debuginfo for the store before removing it.
- for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
- DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
- ConvertDebugDeclareToDebugValue(DII, SI, DIB);
- }
- SI->eraseFromParent();
- LBI.deleteValue(SI);
- }
-
- AI->eraseFromParent();
-
- // The alloca's debuginfo can be removed as well.
- for (DbgVariableIntrinsic *DII : Info.DbgDeclares)
- DII->eraseFromParent();
-
- ++NumLocalPromoted;
- return true;
-}
-
-void PromoteMem2Reg::run() {
- Function &F = *DT.getRoot()->getParent();
-
- AllocaDbgDeclares.resize(Allocas.size());
-
- AllocaInfo Info;
- LargeBlockInfo LBI;
- ForwardIDFCalculator IDF(DT);
-
- for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
- AllocaInst *AI = Allocas[AllocaNum];
-
- assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!");
- assert(AI->getParent()->getParent() == &F &&
- "All allocas should be in the same function, which is same as DF!");
-
- removeLifetimeIntrinsicUsers(AI);
-
- if (AI->use_empty()) {
- // If there are no uses of the alloca, just delete it now.
- AI->eraseFromParent();
-
- // Remove the alloca from the Allocas list, since it has been processed
- RemoveFromAllocasList(AllocaNum);
- ++NumDeadAlloca;
- continue;
- }
-
- // Calculate the set of read and write-locations for each alloca. This is
- // analogous to finding the 'uses' and 'definitions' of each variable.
- Info.AnalyzeAlloca(AI);
-
- // If there is only a single store to this value, replace any loads of
- // it that are directly dominated by the definition with the value stored.
- if (Info.DefiningBlocks.size() == 1) {
- if (rewriteSingleStoreAlloca(AI, Info, LBI, SQ.DL, DT, AC)) {
- // The alloca has been processed, move on.
- RemoveFromAllocasList(AllocaNum);
- ++NumSingleStore;
- continue;
- }
- }
-
- // If the alloca is only read and written in one basic block, just perform a
- // linear sweep over the block to eliminate it.
- if (Info.OnlyUsedInOneBlock &&
- promoteSingleBlockAlloca(AI, Info, LBI, SQ.DL, DT, AC)) {
- // The alloca has been processed, move on.
- RemoveFromAllocasList(AllocaNum);
- continue;
- }
-
- // If we haven't computed a numbering for the BB's in the function, do so
- // now.
- if (BBNumbers.empty()) {
- unsigned ID = 0;
- for (auto &BB : F)
- BBNumbers[&BB] = ID++;
- }
-
- // Remember the dbg.declare intrinsic describing this alloca, if any.
- if (!Info.DbgDeclares.empty())
- AllocaDbgDeclares[AllocaNum] = Info.DbgDeclares;
-
- // Keep the reverse mapping of the 'Allocas' array for the rename pass.
- AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
-
- // At this point, we're committed to promoting the alloca using IDF's, and
- // the standard SSA construction algorithm. Determine which blocks need PHI
- // nodes and see if we can optimize out some work by avoiding insertion of
- // dead phi nodes.
-
- // Unique the set of defining blocks for efficient lookup.
- SmallPtrSet<BasicBlock *, 32> DefBlocks(Info.DefiningBlocks.begin(),
- Info.DefiningBlocks.end());
-
- // Determine which blocks the value is live in. These are blocks which lead
- // to uses.
- SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
- ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
-
- // At this point, we're committed to promoting the alloca using IDF's, and
- // the standard SSA construction algorithm. Determine which blocks need phi
- // nodes and see if we can optimize out some work by avoiding insertion of
- // dead phi nodes.
- IDF.setLiveInBlocks(LiveInBlocks);
- IDF.setDefiningBlocks(DefBlocks);
- SmallVector<BasicBlock *, 32> PHIBlocks;
- IDF.calculate(PHIBlocks);
- llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) {
- return BBNumbers.find(A)->second < BBNumbers.find(B)->second;
- });
-
- unsigned CurrentVersion = 0;
- for (BasicBlock *BB : PHIBlocks)
- QueuePhiNode(BB, AllocaNum, CurrentVersion);
- }
-
- if (Allocas.empty())
- return; // All of the allocas must have been trivial!
-
- LBI.clear();
-
- // Set the incoming values for the basic block to be null values for all of
- // the alloca's. We do this in case there is a load of a value that has not
- // been stored yet. In this case, it will get this null value.
- RenamePassData::ValVector Values(Allocas.size());
- for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
- Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
-
- // When handling debug info, treat all incoming values as if they have unknown
- // locations until proven otherwise.
- RenamePassData::LocationVector Locations(Allocas.size());
-
- // Walks all basic blocks in the function performing the SSA rename algorithm
- // and inserting the phi nodes we marked as necessary
- std::vector<RenamePassData> RenamePassWorkList;
- RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values),
- std::move(Locations));
- do {
- RenamePassData RPD = std::move(RenamePassWorkList.back());
- RenamePassWorkList.pop_back();
- // RenamePass may add new worklist entries.
- RenamePass(RPD.BB, RPD.Pred, RPD.Values, RPD.Locations, RenamePassWorkList);
- } while (!RenamePassWorkList.empty());
-
- // The renamer uses the Visited set to avoid infinite loops. Clear it now.
- Visited.clear();
-
- // Remove the allocas themselves from the function.
- for (Instruction *A : Allocas) {
- // If there are any uses of the alloca instructions left, they must be in
- // unreachable basic blocks that were not processed by walking the dominator
- // tree. Just delete the users now.
- if (!A->use_empty())
- A->replaceAllUsesWith(UndefValue::get(A->getType()));
- A->eraseFromParent();
- }
-
- // Remove alloca's dbg.declare instrinsics from the function.
- for (auto &Declares : AllocaDbgDeclares)
- for (auto *DII : Declares)
- DII->eraseFromParent();
-
- // Loop over all of the PHI nodes and see if there are any that we can get
- // rid of because they merge all of the same incoming values. This can
- // happen due to undef values coming into the PHI nodes. This process is
- // iterative, because eliminating one PHI node can cause others to be removed.
- bool EliminatedAPHI = true;
- while (EliminatedAPHI) {
- EliminatedAPHI = false;
-
- // Iterating over NewPhiNodes is deterministic, so it is safe to try to
- // simplify and RAUW them as we go. If it was not, we could add uses to
- // the values we replace with in a non-deterministic order, thus creating
- // non-deterministic def->use chains.
- for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
- I = NewPhiNodes.begin(),
- E = NewPhiNodes.end();
- I != E;) {
- PHINode *PN = I->second;
-
- // If this PHI node merges one value and/or undefs, get the value.
- if (Value *V = SimplifyInstruction(PN, SQ)) {
- PN->replaceAllUsesWith(V);
- PN->eraseFromParent();
- NewPhiNodes.erase(I++);
- EliminatedAPHI = true;
- continue;
- }
- ++I;
- }
- }
-
- // At this point, the renamer has added entries to PHI nodes for all reachable
- // code. Unfortunately, there may be unreachable blocks which the renamer
- // hasn't traversed. If this is the case, the PHI nodes may not
- // have incoming values for all predecessors. Loop over all PHI nodes we have
- // created, inserting undef values if they are missing any incoming values.
- for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
- I = NewPhiNodes.begin(),
- E = NewPhiNodes.end();
- I != E; ++I) {
- // We want to do this once per basic block. As such, only process a block
- // when we find the PHI that is the first entry in the block.
- PHINode *SomePHI = I->second;
- BasicBlock *BB = SomePHI->getParent();
- if (&BB->front() != SomePHI)
- continue;
-
- // Only do work here if there the PHI nodes are missing incoming values. We
- // know that all PHI nodes that were inserted in a block will have the same
- // number of incoming values, so we can just check any of them.
- if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
- continue;
-
- // Get the preds for BB.
- SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
-
- // Ok, now we know that all of the PHI nodes are missing entries for some
- // basic blocks. Start by sorting the incoming predecessors for efficient
- // access.
- auto CompareBBNumbers = [this](BasicBlock *A, BasicBlock *B) {
- return BBNumbers.find(A)->second < BBNumbers.find(B)->second;
- };
- llvm::sort(Preds, CompareBBNumbers);
-
- // Now we loop through all BB's which have entries in SomePHI and remove
- // them from the Preds list.
- for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
- // Do a log(n) search of the Preds list for the entry we want.
- SmallVectorImpl<BasicBlock *>::iterator EntIt = llvm::lower_bound(
- Preds, SomePHI->getIncomingBlock(i), CompareBBNumbers);
- assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
- "PHI node has entry for a block which is not a predecessor!");
-
- // Remove the entry
- Preds.erase(EntIt);
- }
-
- // At this point, the blocks left in the preds list must have dummy
- // entries inserted into every PHI nodes for the block. Update all the phi
- // nodes in this block that we are inserting (there could be phis before
- // mem2reg runs).
- unsigned NumBadPreds = SomePHI->getNumIncomingValues();
- BasicBlock::iterator BBI = BB->begin();
- while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
- SomePHI->getNumIncomingValues() == NumBadPreds) {
- Value *UndefVal = UndefValue::get(SomePHI->getType());
- for (BasicBlock *Pred : Preds)
- SomePHI->addIncoming(UndefVal, Pred);
- }
- }
-
- NewPhiNodes.clear();
-}
-
-/// Determine which blocks the value is live in.
-///
-/// These are blocks which lead to uses. Knowing this allows us to avoid
-/// inserting PHI nodes into blocks which don't lead to uses (thus, the
-/// inserted phi nodes would be dead).
-void PromoteMem2Reg::ComputeLiveInBlocks(
- AllocaInst *AI, AllocaInfo &Info,
- const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
- SmallPtrSetImpl<BasicBlock *> &LiveInBlocks) {
- // To determine liveness, we must iterate through the predecessors of blocks
- // where the def is live. Blocks are added to the worklist if we need to
- // check their predecessors. Start with all the using blocks.
- SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
- Info.UsingBlocks.end());
-
- // If any of the using blocks is also a definition block, check to see if the
- // definition occurs before or after the use. If it happens before the use,
- // the value isn't really live-in.
- for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
- BasicBlock *BB = LiveInBlockWorklist[i];
- if (!DefBlocks.count(BB))
- continue;
-
- // Okay, this is a block that both uses and defines the value. If the first
- // reference to the alloca is a def (store), then we know it isn't live-in.
- for (BasicBlock::iterator I = BB->begin();; ++I) {
- if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (SI->getOperand(1) != AI)
- continue;
-
- // We found a store to the alloca before a load. The alloca is not
- // actually live-in here.
- LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
- LiveInBlockWorklist.pop_back();
- --i;
- --e;
- break;
- }
-
- if (LoadInst *LI = dyn_cast<LoadInst>(I))
- // Okay, we found a load before a store to the alloca. It is actually
- // live into this block.
- if (LI->getOperand(0) == AI)
- break;
- }
- }
-
- // Now that we have a set of blocks where the phi is live-in, recursively add
- // their predecessors until we find the full region the value is live.
- while (!LiveInBlockWorklist.empty()) {
- BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
-
- // The block really is live in here, insert it into the set. If already in
- // the set, then it has already been processed.
- if (!LiveInBlocks.insert(BB).second)
- continue;
-
- // Since the value is live into BB, it is either defined in a predecessor or
- // live into it to. Add the preds to the worklist unless they are a
- // defining block.
- for (BasicBlock *P : predecessors(BB)) {
- // The value is not live into a predecessor if it defines the value.
- if (DefBlocks.count(P))
- continue;
-
- // Otherwise it is, add to the worklist.
- LiveInBlockWorklist.push_back(P);
- }
- }
-}
-
-/// Queue a phi-node to be added to a basic-block for a specific Alloca.
-///
-/// Returns true if there wasn't already a phi-node for that variable
-bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
- unsigned &Version) {
- // Look up the basic-block in question.
- PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)];
-
- // If the BB already has a phi node added for the i'th alloca then we're done!
- if (PN)
- return false;
-
- // Create a PhiNode using the dereferenced type... and add the phi-node to the
- // BasicBlock.
- PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
- Allocas[AllocaNo]->getName() + "." + Twine(Version++),
- &BB->front());
- ++NumPHIInsert;
- PhiToAllocaMap[PN] = AllocaNo;
- return true;
-}
-
-/// Update the debug location of a phi. \p ApplyMergedLoc indicates whether to
-/// create a merged location incorporating \p DL, or to set \p DL directly.
-static void updateForIncomingValueLocation(PHINode *PN, DebugLoc DL,
- bool ApplyMergedLoc) {
- if (ApplyMergedLoc)
- PN->applyMergedLocation(PN->getDebugLoc(), DL);
- else
- PN->setDebugLoc(DL);
-}
-
-/// Recursively traverse the CFG of the function, renaming loads and
-/// stores to the allocas which we are promoting.
-///
-/// IncomingVals indicates what value each Alloca contains on exit from the
-/// predecessor block Pred.
-void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
- RenamePassData::ValVector &IncomingVals,
- RenamePassData::LocationVector &IncomingLocs,
- std::vector<RenamePassData> &Worklist) {
-NextIteration:
- // If we are inserting any phi nodes into this BB, they will already be in the
- // block.
- if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
- // If we have PHI nodes to update, compute the number of edges from Pred to
- // BB.
- if (PhiToAllocaMap.count(APN)) {
- // We want to be able to distinguish between PHI nodes being inserted by
- // this invocation of mem2reg from those phi nodes that already existed in
- // the IR before mem2reg was run. We determine that APN is being inserted
- // because it is missing incoming edges. All other PHI nodes being
- // inserted by this pass of mem2reg will have the same number of incoming
- // operands so far. Remember this count.
- unsigned NewPHINumOperands = APN->getNumOperands();
-
- unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB);
- assert(NumEdges && "Must be at least one edge from Pred to BB!");
-
- // Add entries for all the phis.
- BasicBlock::iterator PNI = BB->begin();
- do {
- unsigned AllocaNo = PhiToAllocaMap[APN];
-
- // Update the location of the phi node.
- updateForIncomingValueLocation(APN, IncomingLocs[AllocaNo],
- APN->getNumIncomingValues() > 0);
-
- // Add N incoming values to the PHI node.
- for (unsigned i = 0; i != NumEdges; ++i)
- APN->addIncoming(IncomingVals[AllocaNo], Pred);
-
- // The currently active variable for this block is now the PHI.
- IncomingVals[AllocaNo] = APN;
- for (DbgVariableIntrinsic *DII : AllocaDbgDeclares[AllocaNo])
- ConvertDebugDeclareToDebugValue(DII, APN, DIB);
-
- // Get the next phi node.
- ++PNI;
- APN = dyn_cast<PHINode>(PNI);
- if (!APN)
- break;
-
- // Verify that it is missing entries. If not, it is not being inserted
- // by this mem2reg invocation so we want to ignore it.
- } while (APN->getNumOperands() == NewPHINumOperands);
- }
- }
-
- // Don't revisit blocks.
- if (!Visited.insert(BB).second)
- return;
-
- for (BasicBlock::iterator II = BB->begin(); !II->isTerminator();) {
- Instruction *I = &*II++; // get the instruction, increment iterator
-
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
- if (!Src)
- continue;
-
- DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src);
- if (AI == AllocaLookup.end())
- continue;
-
- Value *V = IncomingVals[AI->second];
-
- // If the load was marked as nonnull we don't want to lose
- // that information when we erase this Load. So we preserve
- // it with an assume.
- if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
- !isKnownNonZero(V, SQ.DL, 0, AC, LI, &DT))
- addAssumeNonNull(AC, LI);
-
- // Anything using the load now uses the current value.
- LI->replaceAllUsesWith(V);
- BB->getInstList().erase(LI);
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- // Delete this instruction and mark the name as the current holder of the
- // value
- AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
- if (!Dest)
- continue;
-
- DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
- if (ai == AllocaLookup.end())
- continue;
-
- // what value were we writing?
- unsigned AllocaNo = ai->second;
- IncomingVals[AllocaNo] = SI->getOperand(0);
-
- // Record debuginfo for the store before removing it.
- IncomingLocs[AllocaNo] = SI->getDebugLoc();
- for (DbgVariableIntrinsic *DII : AllocaDbgDeclares[ai->second])
- ConvertDebugDeclareToDebugValue(DII, SI, DIB);
- BB->getInstList().erase(SI);
- }
- }
-
- // 'Recurse' to our successors.
- succ_iterator I = succ_begin(BB), E = succ_end(BB);
- if (I == E)
- return;
-
- // Keep track of the successors so we don't visit the same successor twice
- SmallPtrSet<BasicBlock *, 8> VisitedSuccs;
-
- // Handle the first successor without using the worklist.
- VisitedSuccs.insert(*I);
- Pred = BB;
- BB = *I;
- ++I;
-
- for (; I != E; ++I)
- if (VisitedSuccs.insert(*I).second)
- Worklist.emplace_back(*I, Pred, IncomingVals, IncomingLocs);
-
- goto NextIteration;
-}
-
-void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
- AssumptionCache *AC) {
- // If there is nothing to do, bail out...
- if (Allocas.empty())
- return;
-
- PromoteMem2Reg(Allocas, DT, AC).run();
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
deleted file mode 100644
index bffdd115d940..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ /dev/null
@@ -1,495 +0,0 @@
-//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SSAUpdater class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Use.h"
-#include "llvm/IR/Value.h"
-#include "llvm/IR/ValueHandle.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
-#include <cassert>
-#include <utility>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "ssaupdater"
-
-using AvailableValsTy = DenseMap<BasicBlock *, Value *>;
-
-static AvailableValsTy &getAvailableVals(void *AV) {
- return *static_cast<AvailableValsTy*>(AV);
-}
-
-SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode *> *NewPHI)
- : InsertedPHIs(NewPHI) {}
-
-SSAUpdater::~SSAUpdater() {
- delete static_cast<AvailableValsTy*>(AV);
-}
-
-void SSAUpdater::Initialize(Type *Ty, StringRef Name) {
- if (!AV)
- AV = new AvailableValsTy();
- else
- getAvailableVals(AV).clear();
- ProtoType = Ty;
- ProtoName = Name;
-}
-
-bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
- return getAvailableVals(AV).count(BB);
-}
-
-Value *SSAUpdater::FindValueForBlock(BasicBlock *BB) const {
- AvailableValsTy::iterator AVI = getAvailableVals(AV).find(BB);
- return (AVI != getAvailableVals(AV).end()) ? AVI->second : nullptr;
-}
-
-void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
- assert(ProtoType && "Need to initialize SSAUpdater");
- assert(ProtoType == V->getType() &&
- "All rewritten values must have the same type");
- getAvailableVals(AV)[BB] = V;
-}
-
-static bool IsEquivalentPHI(PHINode *PHI,
- SmallDenseMap<BasicBlock *, Value *, 8> &ValueMapping) {
- unsigned PHINumValues = PHI->getNumIncomingValues();
- if (PHINumValues != ValueMapping.size())
- return false;
-
- // Scan the phi to see if it matches.
- for (unsigned i = 0, e = PHINumValues; i != e; ++i)
- if (ValueMapping[PHI->getIncomingBlock(i)] !=
- PHI->getIncomingValue(i)) {
- return false;
- }
-
- return true;
-}
-
-Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
- Value *Res = GetValueAtEndOfBlockInternal(BB);
- return Res;
-}
-
-Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
- // If there is no definition of the renamed variable in this block, just use
- // GetValueAtEndOfBlock to do our work.
- if (!HasValueForBlock(BB))
- return GetValueAtEndOfBlock(BB);
-
- // Otherwise, we have the hard case. Get the live-in values for each
- // predecessor.
- SmallVector<std::pair<BasicBlock *, Value *>, 8> PredValues;
- Value *SingularValue = nullptr;
-
- // We can get our predecessor info by walking the pred_iterator list, but it
- // is relatively slow. If we already have PHI nodes in this block, walk one
- // of them to get the predecessor list instead.
- if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
- for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
- BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
- Value *PredVal = GetValueAtEndOfBlock(PredBB);
- PredValues.push_back(std::make_pair(PredBB, PredVal));
-
- // Compute SingularValue.
- if (i == 0)
- SingularValue = PredVal;
- else if (PredVal != SingularValue)
- SingularValue = nullptr;
- }
- } else {
- bool isFirstPred = true;
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *PredBB = *PI;
- Value *PredVal = GetValueAtEndOfBlock(PredBB);
- PredValues.push_back(std::make_pair(PredBB, PredVal));
-
- // Compute SingularValue.
- if (isFirstPred) {
- SingularValue = PredVal;
- isFirstPred = false;
- } else if (PredVal != SingularValue)
- SingularValue = nullptr;
- }
- }
-
- // If there are no predecessors, just return undef.
- if (PredValues.empty())
- return UndefValue::get(ProtoType);
-
- // Otherwise, if all the merged values are the same, just use it.
- if (SingularValue)
- return SingularValue;
-
- // Otherwise, we do need a PHI: check to see if we already have one available
- // in this block that produces the right value.
- if (isa<PHINode>(BB->begin())) {
- SmallDenseMap<BasicBlock *, Value *, 8> ValueMapping(PredValues.begin(),
- PredValues.end());
- for (PHINode &SomePHI : BB->phis()) {
- if (IsEquivalentPHI(&SomePHI, ValueMapping))
- return &SomePHI;
- }
- }
-
- // Ok, we have no way out, insert a new one now.
- PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(),
- ProtoName, &BB->front());
-
- // Fill in all the predecessors of the PHI.
- for (const auto &PredValue : PredValues)
- InsertedPHI->addIncoming(PredValue.second, PredValue.first);
-
- // See if the PHI node can be merged to a single value. This can happen in
- // loop cases when we get a PHI of itself and one other value.
- if (Value *V =
- SimplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) {
- InsertedPHI->eraseFromParent();
- return V;
- }
-
- // Set the DebugLoc of the inserted PHI, if available.
- DebugLoc DL;
- if (const Instruction *I = BB->getFirstNonPHI())
- DL = I->getDebugLoc();
- InsertedPHI->setDebugLoc(DL);
-
- // If the client wants to know about all new instructions, tell it.
- if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
-
- LLVM_DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
- return InsertedPHI;
-}
-
-void SSAUpdater::RewriteUse(Use &U) {
- Instruction *User = cast<Instruction>(U.getUser());
-
- Value *V;
- if (PHINode *UserPN = dyn_cast<PHINode>(User))
- V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
- else
- V = GetValueInMiddleOfBlock(User->getParent());
-
- // Notify that users of the existing value that it is being replaced.
- Value *OldVal = U.get();
- if (OldVal != V && OldVal->hasValueHandle())
- ValueHandleBase::ValueIsRAUWd(OldVal, V);
-
- U.set(V);
-}
-
-void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
- Instruction *User = cast<Instruction>(U.getUser());
-
- Value *V;
- if (PHINode *UserPN = dyn_cast<PHINode>(User))
- V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
- else
- V = GetValueAtEndOfBlock(User->getParent());
-
- U.set(V);
-}
-
-namespace llvm {
-
-template<>
-class SSAUpdaterTraits<SSAUpdater> {
-public:
- using BlkT = BasicBlock;
- using ValT = Value *;
- using PhiT = PHINode;
- using BlkSucc_iterator = succ_iterator;
-
- static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); }
- static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); }
-
- class PHI_iterator {
- private:
- PHINode *PHI;
- unsigned idx;
-
- public:
- explicit PHI_iterator(PHINode *P) // begin iterator
- : PHI(P), idx(0) {}
- PHI_iterator(PHINode *P, bool) // end iterator
- : PHI(P), idx(PHI->getNumIncomingValues()) {}
-
- PHI_iterator &operator++() { ++idx; return *this; }
- bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
- bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
-
- Value *getIncomingValue() { return PHI->getIncomingValue(idx); }
- BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); }
- };
-
- static PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
- static PHI_iterator PHI_end(PhiT *PHI) {
- return PHI_iterator(PHI, true);
- }
-
- /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
- /// vector, set Info->NumPreds, and allocate space in Info->Preds.
- static void FindPredecessorBlocks(BasicBlock *BB,
- SmallVectorImpl<BasicBlock *> *Preds) {
- // We can get our predecessor info by walking the pred_iterator list,
- // but it is relatively slow. If we already have PHI nodes in this
- // block, walk one of them to get the predecessor list instead.
- if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
- Preds->append(SomePhi->block_begin(), SomePhi->block_end());
- } else {
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- Preds->push_back(*PI);
- }
- }
-
- /// GetUndefVal - Get an undefined value of the same type as the value
- /// being handled.
- static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) {
- return UndefValue::get(Updater->ProtoType);
- }
-
- /// CreateEmptyPHI - Create a new PHI instruction in the specified block.
- /// Reserve space for the operands but do not fill them in yet.
- static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
- SSAUpdater *Updater) {
- PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds,
- Updater->ProtoName, &BB->front());
- return PHI;
- }
-
- /// AddPHIOperand - Add the specified value as an operand of the PHI for
- /// the specified predecessor block.
- static void AddPHIOperand(PHINode *PHI, Value *Val, BasicBlock *Pred) {
- PHI->addIncoming(Val, Pred);
- }
-
- /// InstrIsPHI - Check if an instruction is a PHI.
- ///
- static PHINode *InstrIsPHI(Instruction *I) {
- return dyn_cast<PHINode>(I);
- }
-
- /// ValueIsPHI - Check if a value is a PHI.
- static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) {
- return dyn_cast<PHINode>(Val);
- }
-
- /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
- /// operands, i.e., it was just added.
- static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) {
- PHINode *PHI = ValueIsPHI(Val, Updater);
- if (PHI && PHI->getNumIncomingValues() == 0)
- return PHI;
- return nullptr;
- }
-
- /// GetPHIValue - For the specified PHI instruction, return the value
- /// that it defines.
- static Value *GetPHIValue(PHINode *PHI) {
- return PHI;
- }
-};
-
-} // end namespace llvm
-
-/// Check to see if AvailableVals has an entry for the specified BB and if so,
-/// return it. If not, construct SSA form by first calculating the required
-/// placement of PHIs and then inserting new PHIs where needed.
-Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
- AvailableValsTy &AvailableVals = getAvailableVals(AV);
- if (Value *V = AvailableVals[BB])
- return V;
-
- SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
- return Impl.GetValue(BB);
-}
-
-//===----------------------------------------------------------------------===//
-// LoadAndStorePromoter Implementation
-//===----------------------------------------------------------------------===//
-
-LoadAndStorePromoter::
-LoadAndStorePromoter(ArrayRef<const Instruction *> Insts,
- SSAUpdater &S, StringRef BaseName) : SSA(S) {
- if (Insts.empty()) return;
-
- const Value *SomeVal;
- if (const LoadInst *LI = dyn_cast<LoadInst>(Insts[0]))
- SomeVal = LI;
- else
- SomeVal = cast<StoreInst>(Insts[0])->getOperand(0);
-
- if (BaseName.empty())
- BaseName = SomeVal->getName();
- SSA.Initialize(SomeVal->getType(), BaseName);
-}
-
-void LoadAndStorePromoter::run(const SmallVectorImpl<Instruction *> &Insts) {
- // First step: bucket up uses of the alloca by the block they occur in.
- // This is important because we have to handle multiple defs/uses in a block
- // ourselves: SSAUpdater is purely for cross-block references.
- DenseMap<BasicBlock *, TinyPtrVector<Instruction *>> UsesByBlock;
-
- for (Instruction *User : Insts)
- UsesByBlock[User->getParent()].push_back(User);
-
- // Okay, now we can iterate over all the blocks in the function with uses,
- // processing them. Keep track of which loads are loading a live-in value.
- // Walk the uses in the use-list order to be determinstic.
- SmallVector<LoadInst *, 32> LiveInLoads;
- DenseMap<Value *, Value *> ReplacedLoads;
-
- for (Instruction *User : Insts) {
- BasicBlock *BB = User->getParent();
- TinyPtrVector<Instruction *> &BlockUses = UsesByBlock[BB];
-
- // If this block has already been processed, ignore this repeat use.
- if (BlockUses.empty()) continue;
-
- // Okay, this is the first use in the block. If this block just has a
- // single user in it, we can rewrite it trivially.
- if (BlockUses.size() == 1) {
- // If it is a store, it is a trivial def of the value in the block.
- if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
- updateDebugInfo(SI);
- SSA.AddAvailableValue(BB, SI->getOperand(0));
- } else
- // Otherwise it is a load, queue it to rewrite as a live-in load.
- LiveInLoads.push_back(cast<LoadInst>(User));
- BlockUses.clear();
- continue;
- }
-
- // Otherwise, check to see if this block is all loads.
- bool HasStore = false;
- for (Instruction *I : BlockUses) {
- if (isa<StoreInst>(I)) {
- HasStore = true;
- break;
- }
- }
-
- // If so, we can queue them all as live in loads. We don't have an
- // efficient way to tell which on is first in the block and don't want to
- // scan large blocks, so just add all loads as live ins.
- if (!HasStore) {
- for (Instruction *I : BlockUses)
- LiveInLoads.push_back(cast<LoadInst>(I));
- BlockUses.clear();
- continue;
- }
-
- // Otherwise, we have mixed loads and stores (or just a bunch of stores).
- // Since SSAUpdater is purely for cross-block values, we need to determine
- // the order of these instructions in the block. If the first use in the
- // block is a load, then it uses the live in value. The last store defines
- // the live out value. We handle this by doing a linear scan of the block.
- Value *StoredValue = nullptr;
- for (Instruction &I : *BB) {
- if (LoadInst *L = dyn_cast<LoadInst>(&I)) {
- // If this is a load from an unrelated pointer, ignore it.
- if (!isInstInList(L, Insts)) continue;
-
- // If we haven't seen a store yet, this is a live in use, otherwise
- // use the stored value.
- if (StoredValue) {
- replaceLoadWithValue(L, StoredValue);
- L->replaceAllUsesWith(StoredValue);
- ReplacedLoads[L] = StoredValue;
- } else {
- LiveInLoads.push_back(L);
- }
- continue;
- }
-
- if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
- // If this is a store to an unrelated pointer, ignore it.
- if (!isInstInList(SI, Insts)) continue;
- updateDebugInfo(SI);
-
- // Remember that this is the active value in the block.
- StoredValue = SI->getOperand(0);
- }
- }
-
- // The last stored value that happened is the live-out for the block.
- assert(StoredValue && "Already checked that there is a store in block");
- SSA.AddAvailableValue(BB, StoredValue);
- BlockUses.clear();
- }
-
- // Okay, now we rewrite all loads that use live-in values in the loop,
- // inserting PHI nodes as necessary.
- for (LoadInst *ALoad : LiveInLoads) {
- Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
- replaceLoadWithValue(ALoad, NewVal);
-
- // Avoid assertions in unreachable code.
- if (NewVal == ALoad) NewVal = UndefValue::get(NewVal->getType());
- ALoad->replaceAllUsesWith(NewVal);
- ReplacedLoads[ALoad] = NewVal;
- }
-
- // Allow the client to do stuff before we start nuking things.
- doExtraRewritesBeforeFinalDeletion();
-
- // Now that everything is rewritten, delete the old instructions from the
- // function. They should all be dead now.
- for (Instruction *User : Insts) {
- // If this is a load that still has uses, then the load must have been added
- // as a live value in the SSAUpdate data structure for a block (e.g. because
- // the loaded value was stored later). In this case, we need to recursively
- // propagate the updates until we get to the real value.
- if (!User->use_empty()) {
- Value *NewVal = ReplacedLoads[User];
- assert(NewVal && "not a replaced load?");
-
- // Propagate down to the ultimate replacee. The intermediately loads
- // could theoretically already have been deleted, so we don't want to
- // dereference the Value*'s.
- DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
- while (RLI != ReplacedLoads.end()) {
- NewVal = RLI->second;
- RLI = ReplacedLoads.find(NewVal);
- }
-
- replaceLoadWithValue(cast<LoadInst>(User), NewVal);
- User->replaceAllUsesWith(NewVal);
- }
-
- instructionDeleted(User);
- User->eraseFromParent();
- }
-}
-
-bool
-LoadAndStorePromoter::isInstInList(Instruction *I,
- const SmallVectorImpl<Instruction *> &Insts)
- const {
- return is_contained(Insts, I);
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp
deleted file mode 100644
index 917d5e0a1ef0..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp
+++ /dev/null
@@ -1,190 +0,0 @@
-//===- SSAUpdaterBulk.cpp - Unstructured SSA Update Tool ------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SSAUpdaterBulk class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
-#include "llvm/Analysis/IteratedDominanceFrontier.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Use.h"
-#include "llvm/IR/Value.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "ssaupdaterbulk"
-
-/// Helper function for finding a block which should have a value for the given
-/// user. For PHI-nodes this block is the corresponding predecessor, for other
-/// instructions it's their parent block.
-static BasicBlock *getUserBB(Use *U) {
- auto *User = cast<Instruction>(U->getUser());
-
- if (auto *UserPN = dyn_cast<PHINode>(User))
- return UserPN->getIncomingBlock(*U);
- else
- return User->getParent();
-}
-
-/// Add a new variable to the SSA rewriter. This needs to be called before
-/// AddAvailableValue or AddUse calls.
-unsigned SSAUpdaterBulk::AddVariable(StringRef Name, Type *Ty) {
- unsigned Var = Rewrites.size();
- LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var << ": initialized with Ty = "
- << *Ty << ", Name = " << Name << "\n");
- RewriteInfo RI(Name, Ty);
- Rewrites.push_back(RI);
- return Var;
-}
-
-/// Indicate that a rewritten value is available in the specified block with the
-/// specified value.
-void SSAUpdaterBulk::AddAvailableValue(unsigned Var, BasicBlock *BB, Value *V) {
- assert(Var < Rewrites.size() && "Variable not found!");
- LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var
- << ": added new available value" << *V << " in "
- << BB->getName() << "\n");
- Rewrites[Var].Defines[BB] = V;
-}
-
-/// Record a use of the symbolic value. This use will be updated with a
-/// rewritten value when RewriteAllUses is called.
-void SSAUpdaterBulk::AddUse(unsigned Var, Use *U) {
- assert(Var < Rewrites.size() && "Variable not found!");
- LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var << ": added a use" << *U->get()
- << " in " << getUserBB(U)->getName() << "\n");
- Rewrites[Var].Uses.push_back(U);
-}
-
-/// Return true if the SSAUpdater already has a value for the specified variable
-/// in the specified block.
-bool SSAUpdaterBulk::HasValueForBlock(unsigned Var, BasicBlock *BB) {
- return (Var < Rewrites.size()) ? Rewrites[Var].Defines.count(BB) : false;
-}
-
-// Compute value at the given block BB. We either should already know it, or we
-// should be able to recursively reach it going up dominator tree.
-Value *SSAUpdaterBulk::computeValueAt(BasicBlock *BB, RewriteInfo &R,
- DominatorTree *DT) {
- if (!R.Defines.count(BB)) {
- if (DT->isReachableFromEntry(BB) && PredCache.get(BB).size()) {
- BasicBlock *IDom = DT->getNode(BB)->getIDom()->getBlock();
- Value *V = computeValueAt(IDom, R, DT);
- R.Defines[BB] = V;
- } else
- R.Defines[BB] = UndefValue::get(R.Ty);
- }
- return R.Defines[BB];
-}
-
-/// Given sets of UsingBlocks and DefBlocks, compute the set of LiveInBlocks.
-/// This is basically a subgraph limited by DefBlocks and UsingBlocks.
-static void
-ComputeLiveInBlocks(const SmallPtrSetImpl<BasicBlock *> &UsingBlocks,
- const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
- SmallPtrSetImpl<BasicBlock *> &LiveInBlocks,
- PredIteratorCache &PredCache) {
- // To determine liveness, we must iterate through the predecessors of blocks
- // where the def is live. Blocks are added to the worklist if we need to
- // check their predecessors. Start with all the using blocks.
- SmallVector<BasicBlock *, 64> LiveInBlockWorklist(UsingBlocks.begin(),
- UsingBlocks.end());
-
- // Now that we have a set of blocks where the phi is live-in, recursively add
- // their predecessors until we find the full region the value is live.
- while (!LiveInBlockWorklist.empty()) {
- BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
-
- // The block really is live in here, insert it into the set. If already in
- // the set, then it has already been processed.
- if (!LiveInBlocks.insert(BB).second)
- continue;
-
- // Since the value is live into BB, it is either defined in a predecessor or
- // live into it to. Add the preds to the worklist unless they are a
- // defining block.
- for (BasicBlock *P : PredCache.get(BB)) {
- // The value is not live into a predecessor if it defines the value.
- if (DefBlocks.count(P))
- continue;
-
- // Otherwise it is, add to the worklist.
- LiveInBlockWorklist.push_back(P);
- }
- }
-}
-
-/// Perform all the necessary updates, including new PHI-nodes insertion and the
-/// requested uses update.
-void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT,
- SmallVectorImpl<PHINode *> *InsertedPHIs) {
- for (auto &R : Rewrites) {
- // Compute locations for new phi-nodes.
- // For that we need to initialize DefBlocks from definitions in R.Defines,
- // UsingBlocks from uses in R.Uses, then compute LiveInBlocks, and then use
- // this set for computing iterated dominance frontier (IDF).
- // The IDF blocks are the blocks where we need to insert new phi-nodes.
- ForwardIDFCalculator IDF(*DT);
- LLVM_DEBUG(dbgs() << "SSAUpdater: rewriting " << R.Uses.size()
- << " use(s)\n");
-
- SmallPtrSet<BasicBlock *, 2> DefBlocks;
- for (auto &Def : R.Defines)
- DefBlocks.insert(Def.first);
- IDF.setDefiningBlocks(DefBlocks);
-
- SmallPtrSet<BasicBlock *, 2> UsingBlocks;
- for (Use *U : R.Uses)
- UsingBlocks.insert(getUserBB(U));
-
- SmallVector<BasicBlock *, 32> IDFBlocks;
- SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
- ComputeLiveInBlocks(UsingBlocks, DefBlocks, LiveInBlocks, PredCache);
- IDF.resetLiveInBlocks();
- IDF.setLiveInBlocks(LiveInBlocks);
- IDF.calculate(IDFBlocks);
-
- // We've computed IDF, now insert new phi-nodes there.
- SmallVector<PHINode *, 4> InsertedPHIsForVar;
- for (auto *FrontierBB : IDFBlocks) {
- IRBuilder<> B(FrontierBB, FrontierBB->begin());
- PHINode *PN = B.CreatePHI(R.Ty, 0, R.Name);
- R.Defines[FrontierBB] = PN;
- InsertedPHIsForVar.push_back(PN);
- if (InsertedPHIs)
- InsertedPHIs->push_back(PN);
- }
-
- // Fill in arguments of the inserted PHIs.
- for (auto *PN : InsertedPHIsForVar) {
- BasicBlock *PBB = PN->getParent();
- for (BasicBlock *Pred : PredCache.get(PBB))
- PN->addIncoming(computeValueAt(Pred, R, DT), Pred);
- }
-
- // Rewrite actual uses with the inserted definitions.
- SmallPtrSet<Use *, 4> ProcessedUses;
- for (Use *U : R.Uses) {
- if (!ProcessedUses.insert(U).second)
- continue;
- Value *V = computeValueAt(getUserBB(U), R, DT);
- Value *OldVal = U->get();
- assert(OldVal && "Invalid use!");
- // Notify that users of the existing value that it is being replaced.
- if (OldVal != V && OldVal->hasValueHandle())
- ValueHandleBase::ValueIsRAUWd(OldVal, V);
- LLVM_DEBUG(dbgs() << "SSAUpdater: replacing " << *OldVal << " with " << *V
- << "\n");
- U->set(V);
- }
- }
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp b/contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp
deleted file mode 100644
index a1313c77ed77..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-//===- SanitizerStats.cpp - Sanitizer statistics gathering ----------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Implements code generation for sanitizer statistics gathering.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/SanitizerStats.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
-
-using namespace llvm;
-
-SanitizerStatReport::SanitizerStatReport(Module *M) : M(M) {
- StatTy = ArrayType::get(Type::getInt8PtrTy(M->getContext()), 2);
- EmptyModuleStatsTy = makeModuleStatsTy();
-
- ModuleStatsGV = new GlobalVariable(*M, EmptyModuleStatsTy, false,
- GlobalValue::InternalLinkage, nullptr);
-}
-
-ArrayType *SanitizerStatReport::makeModuleStatsArrayTy() {
- return ArrayType::get(StatTy, Inits.size());
-}
-
-StructType *SanitizerStatReport::makeModuleStatsTy() {
- return StructType::get(M->getContext(), {Type::getInt8PtrTy(M->getContext()),
- Type::getInt32Ty(M->getContext()),
- makeModuleStatsArrayTy()});
-}
-
-void SanitizerStatReport::create(IRBuilder<> &B, SanitizerStatKind SK) {
- Function *F = B.GetInsertBlock()->getParent();
- Module *M = F->getParent();
- PointerType *Int8PtrTy = B.getInt8PtrTy();
- IntegerType *IntPtrTy = B.getIntPtrTy(M->getDataLayout());
- ArrayType *StatTy = ArrayType::get(Int8PtrTy, 2);
-
- Inits.push_back(ConstantArray::get(
- StatTy,
- {Constant::getNullValue(Int8PtrTy),
- ConstantExpr::getIntToPtr(
- ConstantInt::get(IntPtrTy, uint64_t(SK) << (IntPtrTy->getBitWidth() -
- kSanitizerStatKindBits)),
- Int8PtrTy)}));
-
- FunctionType *StatReportTy =
- FunctionType::get(B.getVoidTy(), Int8PtrTy, false);
- FunctionCallee StatReport =
- M->getOrInsertFunction("__sanitizer_stat_report", StatReportTy);
-
- auto InitAddr = ConstantExpr::getGetElementPtr(
- EmptyModuleStatsTy, ModuleStatsGV,
- ArrayRef<Constant *>{
- ConstantInt::get(IntPtrTy, 0), ConstantInt::get(B.getInt32Ty(), 2),
- ConstantInt::get(IntPtrTy, Inits.size() - 1),
- });
- B.CreateCall(StatReport, ConstantExpr::getBitCast(InitAddr, Int8PtrTy));
-}
-
-void SanitizerStatReport::finish() {
- if (Inits.empty()) {
- ModuleStatsGV->eraseFromParent();
- return;
- }
-
- PointerType *Int8PtrTy = Type::getInt8PtrTy(M->getContext());
- IntegerType *Int32Ty = Type::getInt32Ty(M->getContext());
- Type *VoidTy = Type::getVoidTy(M->getContext());
-
- // Create a new ModuleStatsGV to replace the old one. We can't just set the
- // old one's initializer because its type is different.
- auto NewModuleStatsGV = new GlobalVariable(
- *M, makeModuleStatsTy(), false, GlobalValue::InternalLinkage,
- ConstantStruct::getAnon(
- {Constant::getNullValue(Int8PtrTy),
- ConstantInt::get(Int32Ty, Inits.size()),
- ConstantArray::get(makeModuleStatsArrayTy(), Inits)}));
- ModuleStatsGV->replaceAllUsesWith(
- ConstantExpr::getBitCast(NewModuleStatsGV, ModuleStatsGV->getType()));
- ModuleStatsGV->eraseFromParent();
-
- // Create a global constructor to register NewModuleStatsGV.
- auto F = Function::Create(FunctionType::get(VoidTy, false),
- GlobalValue::InternalLinkage, "", M);
- auto BB = BasicBlock::Create(M->getContext(), "", F);
- IRBuilder<> B(BB);
-
- FunctionType *StatInitTy = FunctionType::get(VoidTy, Int8PtrTy, false);
- FunctionCallee StatInit =
- M->getOrInsertFunction("__sanitizer_stat_init", StatInitTy);
-
- B.CreateCall(StatInit, ConstantExpr::getBitCast(NewModuleStatsGV, Int8PtrTy));
- B.CreateRetVoid();
-
- appendToGlobalCtors(*M, F, 0);
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
deleted file mode 100644
index 6e2ef67408d9..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ /dev/null
@@ -1,6081 +0,0 @@
-//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Peephole optimize the CFG.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/MemorySSA.h"
-#include "llvm/Analysis/MemorySSAUpdater.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/ConstantRange.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/NoFolder.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Use.h"
-#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-#include <algorithm>
-#include <cassert>
-#include <climits>
-#include <cstddef>
-#include <cstdint>
-#include <iterator>
-#include <map>
-#include <set>
-#include <tuple>
-#include <utility>
-#include <vector>
-
-using namespace llvm;
-using namespace PatternMatch;
-
-#define DEBUG_TYPE "simplifycfg"
-
-// Chosen as 2 so as to be cheap, but still to have enough power to fold
-// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
-// To catch this, we need to fold a compare and a select, hence '2' being the
-// minimum reasonable default.
-static cl::opt<unsigned> PHINodeFoldingThreshold(
- "phi-node-folding-threshold", cl::Hidden, cl::init(2),
- cl::desc(
- "Control the amount of phi node folding to perform (default = 2)"));
-
-static cl::opt<bool> DupRet(
- "simplifycfg-dup-ret", cl::Hidden, cl::init(false),
- cl::desc("Duplicate return instructions into unconditional branches"));
-
-static cl::opt<bool>
- SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
- cl::desc("Sink common instructions down to the end block"));
-
-static cl::opt<bool> HoistCondStores(
- "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
- cl::desc("Hoist conditional stores if an unconditional store precedes"));
-
-static cl::opt<bool> MergeCondStores(
- "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
- cl::desc("Hoist conditional stores even if an unconditional store does not "
- "precede - hoist multiple conditional stores into a single "
- "predicated store"));
-
-static cl::opt<bool> MergeCondStoresAggressively(
- "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
- cl::desc("When merging conditional stores, do so even if the resultant "
- "basic blocks are unlikely to be if-converted as a result"));
-
-static cl::opt<bool> SpeculateOneExpensiveInst(
- "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
- cl::desc("Allow exactly one expensive instruction to be speculatively "
- "executed"));
-
-static cl::opt<unsigned> MaxSpeculationDepth(
- "max-speculation-depth", cl::Hidden, cl::init(10),
- cl::desc("Limit maximum recursion depth when calculating costs of "
- "speculatively executed instructions"));
-
-STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
-STATISTIC(NumLinearMaps,
- "Number of switch instructions turned into linear mapping");
-STATISTIC(NumLookupTables,
- "Number of switch instructions turned into lookup tables");
-STATISTIC(
- NumLookupTablesHoles,
- "Number of switch instructions turned into lookup tables (holes checked)");
-STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
-STATISTIC(NumSinkCommons,
- "Number of common instructions sunk down to the end block");
-STATISTIC(NumSpeculations, "Number of speculative executed instructions");
-
-namespace {
-
-// The first field contains the value that the switch produces when a certain
-// case group is selected, and the second field is a vector containing the
-// cases composing the case group.
-using SwitchCaseResultVectorTy =
- SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>;
-
-// The first field contains the phi node that generates a result of the switch
-// and the second field contains the value generated for a certain case in the
-// switch for that PHI.
-using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
-
-/// ValueEqualityComparisonCase - Represents a case of a switch.
-struct ValueEqualityComparisonCase {
- ConstantInt *Value;
- BasicBlock *Dest;
-
- ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
- : Value(Value), Dest(Dest) {}
-
- bool operator<(ValueEqualityComparisonCase RHS) const {
- // Comparing pointers is ok as we only rely on the order for uniquing.
- return Value < RHS.Value;
- }
-
- bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
-};
-
-class SimplifyCFGOpt {
- const TargetTransformInfo &TTI;
- const DataLayout &DL;
- SmallPtrSetImpl<BasicBlock *> *LoopHeaders;
- const SimplifyCFGOptions &Options;
- bool Resimplify;
-
- Value *isValueEqualityComparison(Instruction *TI);
- BasicBlock *GetValueEqualityComparisonCases(
- Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
- bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
- BasicBlock *Pred,
- IRBuilder<> &Builder);
- bool FoldValueComparisonIntoPredecessors(Instruction *TI,
- IRBuilder<> &Builder);
-
- bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
- bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
- bool SimplifySingleResume(ResumeInst *RI);
- bool SimplifyCommonResume(ResumeInst *RI);
- bool SimplifyCleanupReturn(CleanupReturnInst *RI);
- bool SimplifyUnreachable(UnreachableInst *UI);
- bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
- bool SimplifyIndirectBr(IndirectBrInst *IBI);
- bool SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
- bool SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
-
- bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
- IRBuilder<> &Builder);
-
-public:
- SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
- SmallPtrSetImpl<BasicBlock *> *LoopHeaders,
- const SimplifyCFGOptions &Opts)
- : TTI(TTI), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {}
-
- bool run(BasicBlock *BB);
- bool simplifyOnce(BasicBlock *BB);
-
- // Helper to set Resimplify and return change indication.
- bool requestResimplify() {
- Resimplify = true;
- return true;
- }
-};
-
-} // end anonymous namespace
-
-/// Return true if it is safe to merge these two
-/// terminator instructions together.
-static bool
-SafeToMergeTerminators(Instruction *SI1, Instruction *SI2,
- SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
- if (SI1 == SI2)
- return false; // Can't merge with self!
-
- // It is not safe to merge these two switch instructions if they have a common
- // successor, and if that successor has a PHI node, and if *that* PHI node has
- // conflicting incoming values from the two switch blocks.
- BasicBlock *SI1BB = SI1->getParent();
- BasicBlock *SI2BB = SI2->getParent();
-
- SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
- bool Fail = false;
- for (BasicBlock *Succ : successors(SI2BB))
- if (SI1Succs.count(Succ))
- for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) {
- PHINode *PN = cast<PHINode>(BBI);
- if (PN->getIncomingValueForBlock(SI1BB) !=
- PN->getIncomingValueForBlock(SI2BB)) {
- if (FailBlocks)
- FailBlocks->insert(Succ);
- Fail = true;
- }
- }
-
- return !Fail;
-}
-
-/// Return true if it is safe and profitable to merge these two terminator
-/// instructions together, where SI1 is an unconditional branch. PhiNodes will
-/// store all PHI nodes in common successors.
-static bool
-isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2,
- Instruction *Cond,
- SmallVectorImpl<PHINode *> &PhiNodes) {
- if (SI1 == SI2)
- return false; // Can't merge with self!
- assert(SI1->isUnconditional() && SI2->isConditional());
-
- // We fold the unconditional branch if we can easily update all PHI nodes in
- // common successors:
- // 1> We have a constant incoming value for the conditional branch;
- // 2> We have "Cond" as the incoming value for the unconditional branch;
- // 3> SI2->getCondition() and Cond have same operands.
- CmpInst *Ci2 = dyn_cast<CmpInst>(SI2->getCondition());
- if (!Ci2)
- return false;
- if (!(Cond->getOperand(0) == Ci2->getOperand(0) &&
- Cond->getOperand(1) == Ci2->getOperand(1)) &&
- !(Cond->getOperand(0) == Ci2->getOperand(1) &&
- Cond->getOperand(1) == Ci2->getOperand(0)))
- return false;
-
- BasicBlock *SI1BB = SI1->getParent();
- BasicBlock *SI2BB = SI2->getParent();
- SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
- for (BasicBlock *Succ : successors(SI2BB))
- if (SI1Succs.count(Succ))
- for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) {
- PHINode *PN = cast<PHINode>(BBI);
- if (PN->getIncomingValueForBlock(SI1BB) != Cond ||
- !isa<ConstantInt>(PN->getIncomingValueForBlock(SI2BB)))
- return false;
- PhiNodes.push_back(PN);
- }
- return true;
-}
-
-/// Update PHI nodes in Succ to indicate that there will now be entries in it
-/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
-/// will be the same as those coming in from ExistPred, an existing predecessor
-/// of Succ.
-static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
- BasicBlock *ExistPred,
- MemorySSAUpdater *MSSAU = nullptr) {
- for (PHINode &PN : Succ->phis())
- PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
- if (MSSAU)
- if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
- MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
-}
-
-/// Compute an abstract "cost" of speculating the given instruction,
-/// which is assumed to be safe to speculate. TCC_Free means cheap,
-/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
-/// expensive.
-static unsigned ComputeSpeculationCost(const User *I,
- const TargetTransformInfo &TTI) {
- assert(isSafeToSpeculativelyExecute(I) &&
- "Instruction is not safe to speculatively execute!");
- return TTI.getUserCost(I);
-}
-
-/// If we have a merge point of an "if condition" as accepted above,
-/// return true if the specified value dominates the block. We
-/// don't handle the true generality of domination here, just a special case
-/// which works well enough for us.
-///
-/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
-/// see if V (which must be an instruction) and its recursive operands
-/// that do not dominate BB have a combined cost lower than CostRemaining and
-/// are non-trapping. If both are true, the instruction is inserted into the
-/// set and true is returned.
-///
-/// The cost for most non-trapping instructions is defined as 1 except for
-/// Select whose cost is 2.
-///
-/// After this function returns, CostRemaining is decreased by the cost of
-/// V plus its non-dominating operands. If that cost is greater than
-/// CostRemaining, false is returned and CostRemaining is undefined.
-static bool DominatesMergePoint(Value *V, BasicBlock *BB,
- SmallPtrSetImpl<Instruction *> &AggressiveInsts,
- unsigned &CostRemaining,
- const TargetTransformInfo &TTI,
- unsigned Depth = 0) {
- // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
- // so limit the recursion depth.
- // TODO: While this recursion limit does prevent pathological behavior, it
- // would be better to track visited instructions to avoid cycles.
- if (Depth == MaxSpeculationDepth)
- return false;
-
- Instruction *I = dyn_cast<Instruction>(V);
- if (!I) {
- // Non-instructions all dominate instructions, but not all constantexprs
- // can be executed unconditionally.
- if (ConstantExpr *C = dyn_cast<ConstantExpr>(V))
- if (C->canTrap())
- return false;
- return true;
- }
- BasicBlock *PBB = I->getParent();
-
- // We don't want to allow weird loops that might have the "if condition" in
- // the bottom of this block.
- if (PBB == BB)
- return false;
-
- // If this instruction is defined in a block that contains an unconditional
- // branch to BB, then it must be in the 'conditional' part of the "if
- // statement". If not, it definitely dominates the region.
- BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
- if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
- return true;
-
- // If we have seen this instruction before, don't count it again.
- if (AggressiveInsts.count(I))
- return true;
-
- // Okay, it looks like the instruction IS in the "condition". Check to
- // see if it's a cheap instruction to unconditionally compute, and if it
- // only uses stuff defined outside of the condition. If so, hoist it out.
- if (!isSafeToSpeculativelyExecute(I))
- return false;
-
- unsigned Cost = ComputeSpeculationCost(I, TTI);
-
- // Allow exactly one instruction to be speculated regardless of its cost
- // (as long as it is safe to do so).
- // This is intended to flatten the CFG even if the instruction is a division
- // or other expensive operation. The speculation of an expensive instruction
- // is expected to be undone in CodeGenPrepare if the speculation has not
- // enabled further IR optimizations.
- if (Cost > CostRemaining &&
- (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0))
- return false;
-
- // Avoid unsigned wrap.
- CostRemaining = (Cost > CostRemaining) ? 0 : CostRemaining - Cost;
-
- // Okay, we can only really hoist these out if their operands do
- // not take us over the cost threshold.
- for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
- if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI,
- Depth + 1))
- return false;
- // Okay, it's safe to do this! Remember this instruction.
- AggressiveInsts.insert(I);
- return true;
-}
-
-/// Extract ConstantInt from value, looking through IntToPtr
-/// and PointerNullValue. Return NULL if value is not a constant int.
-static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
- // Normal constant int.
- ConstantInt *CI = dyn_cast<ConstantInt>(V);
- if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
- return CI;
-
- // This is some kind of pointer constant. Turn it into a pointer-sized
- // ConstantInt if possible.
- IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
-
- // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
- if (isa<ConstantPointerNull>(V))
- return ConstantInt::get(PtrTy, 0);
-
- // IntToPtr const int.
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- if (CE->getOpcode() == Instruction::IntToPtr)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
- // The constant is very likely to have the right type already.
- if (CI->getType() == PtrTy)
- return CI;
- else
- return cast<ConstantInt>(
- ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
- }
- return nullptr;
-}
-
-namespace {
-
-/// Given a chain of or (||) or and (&&) comparison of a value against a
-/// constant, this will try to recover the information required for a switch
-/// structure.
-/// It will depth-first traverse the chain of comparison, seeking for patterns
-/// like %a == 12 or %a < 4 and combine them to produce a set of integer
-/// representing the different cases for the switch.
-/// Note that if the chain is composed of '||' it will build the set of elements
-/// that matches the comparisons (i.e. any of this value validate the chain)
-/// while for a chain of '&&' it will build the set elements that make the test
-/// fail.
-struct ConstantComparesGatherer {
- const DataLayout &DL;
-
- /// Value found for the switch comparison
- Value *CompValue = nullptr;
-
- /// Extra clause to be checked before the switch
- Value *Extra = nullptr;
-
- /// Set of integers to match in switch
- SmallVector<ConstantInt *, 8> Vals;
-
- /// Number of comparisons matched in the and/or chain
- unsigned UsedICmps = 0;
-
- /// Construct and compute the result for the comparison instruction Cond
- ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
- gather(Cond);
- }
-
- ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
- ConstantComparesGatherer &
- operator=(const ConstantComparesGatherer &) = delete;
-
-private:
- /// Try to set the current value used for the comparison, it succeeds only if
- /// it wasn't set before or if the new value is the same as the old one
- bool setValueOnce(Value *NewVal) {
- if (CompValue && CompValue != NewVal)
- return false;
- CompValue = NewVal;
- return (CompValue != nullptr);
- }
-
- /// Try to match Instruction "I" as a comparison against a constant and
- /// populates the array Vals with the set of values that match (or do not
- /// match depending on isEQ).
- /// Return false on failure. On success, the Value the comparison matched
- /// against is placed in CompValue.
- /// If CompValue is already set, the function is expected to fail if a match
- /// is found but the value compared to is different.
- bool matchInstruction(Instruction *I, bool isEQ) {
- // If this is an icmp against a constant, handle this as one of the cases.
- ICmpInst *ICI;
- ConstantInt *C;
- if (!((ICI = dyn_cast<ICmpInst>(I)) &&
- (C = GetConstantInt(I->getOperand(1), DL)))) {
- return false;
- }
-
- Value *RHSVal;
- const APInt *RHSC;
-
- // Pattern match a special case
- // (x & ~2^z) == y --> x == y || x == y|2^z
- // This undoes a transformation done by instcombine to fuse 2 compares.
- if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
- // It's a little bit hard to see why the following transformations are
- // correct. Here is a CVC3 program to verify them for 64-bit values:
-
- /*
- ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
- x : BITVECTOR(64);
- y : BITVECTOR(64);
- z : BITVECTOR(64);
- mask : BITVECTOR(64) = BVSHL(ONE, z);
- QUERY( (y & ~mask = y) =>
- ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
- );
- QUERY( (y | mask = y) =>
- ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
- );
- */
-
- // Please note that each pattern must be a dual implication (<--> or
- // iff). One directional implication can create spurious matches. If the
- // implication is only one-way, an unsatisfiable condition on the left
- // side can imply a satisfiable condition on the right side. Dual
- // implication ensures that satisfiable conditions are transformed to
- // other satisfiable conditions and unsatisfiable conditions are
- // transformed to other unsatisfiable conditions.
-
- // Here is a concrete example of a unsatisfiable condition on the left
- // implying a satisfiable condition on the right:
- //
- // mask = (1 << z)
- // (x & ~mask) == y --> (x == y || x == (y | mask))
- //
- // Substituting y = 3, z = 0 yields:
- // (x & -2) == 3 --> (x == 3 || x == 2)
-
- // Pattern match a special case:
- /*
- QUERY( (y & ~mask = y) =>
- ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
- );
- */
- if (match(ICI->getOperand(0),
- m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
- APInt Mask = ~*RHSC;
- if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
- // If we already have a value for the switch, it has to match!
- if (!setValueOnce(RHSVal))
- return false;
-
- Vals.push_back(C);
- Vals.push_back(
- ConstantInt::get(C->getContext(),
- C->getValue() | Mask));
- UsedICmps++;
- return true;
- }
- }
-
- // Pattern match a special case:
- /*
- QUERY( (y | mask = y) =>
- ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
- );
- */
- if (match(ICI->getOperand(0),
- m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
- APInt Mask = *RHSC;
- if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
- // If we already have a value for the switch, it has to match!
- if (!setValueOnce(RHSVal))
- return false;
-
- Vals.push_back(C);
- Vals.push_back(ConstantInt::get(C->getContext(),
- C->getValue() & ~Mask));
- UsedICmps++;
- return true;
- }
- }
-
- // If we already have a value for the switch, it has to match!
- if (!setValueOnce(ICI->getOperand(0)))
- return false;
-
- UsedICmps++;
- Vals.push_back(C);
- return ICI->getOperand(0);
- }
-
- // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
- ConstantRange Span = ConstantRange::makeAllowedICmpRegion(
- ICI->getPredicate(), C->getValue());
-
- // Shift the range if the compare is fed by an add. This is the range
- // compare idiom as emitted by instcombine.
- Value *CandidateVal = I->getOperand(0);
- if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
- Span = Span.subtract(*RHSC);
- CandidateVal = RHSVal;
- }
-
- // If this is an and/!= check, then we are looking to build the set of
- // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
- // x != 0 && x != 1.
- if (!isEQ)
- Span = Span.inverse();
-
- // If there are a ton of values, we don't want to make a ginormous switch.
- if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
- return false;
- }
-
- // If we already have a value for the switch, it has to match!
- if (!setValueOnce(CandidateVal))
- return false;
-
- // Add all values from the range to the set
- for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
- Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
-
- UsedICmps++;
- return true;
- }
-
- /// Given a potentially 'or'd or 'and'd together collection of icmp
- /// eq/ne/lt/gt instructions that compare a value against a constant, extract
- /// the value being compared, and stick the list constants into the Vals
- /// vector.
- /// One "Extra" case is allowed to differ from the other.
- void gather(Value *V) {
- Instruction *I = dyn_cast<Instruction>(V);
- bool isEQ = (I->getOpcode() == Instruction::Or);
-
- // Keep a stack (SmallVector for efficiency) for depth-first traversal
- SmallVector<Value *, 8> DFT;
- SmallPtrSet<Value *, 8> Visited;
-
- // Initialize
- Visited.insert(V);
- DFT.push_back(V);
-
- while (!DFT.empty()) {
- V = DFT.pop_back_val();
-
- if (Instruction *I = dyn_cast<Instruction>(V)) {
- // If it is a || (or && depending on isEQ), process the operands.
- if (I->getOpcode() == (isEQ ? Instruction::Or : Instruction::And)) {
- if (Visited.insert(I->getOperand(1)).second)
- DFT.push_back(I->getOperand(1));
- if (Visited.insert(I->getOperand(0)).second)
- DFT.push_back(I->getOperand(0));
- continue;
- }
-
- // Try to match the current instruction
- if (matchInstruction(I, isEQ))
- // Match succeed, continue the loop
- continue;
- }
-
- // One element of the sequence of || (or &&) could not be match as a
- // comparison against the same value as the others.
- // We allow only one "Extra" case to be checked before the switch
- if (!Extra) {
- Extra = V;
- continue;
- }
- // Failed to parse a proper sequence, abort now
- CompValue = nullptr;
- break;
- }
- }
-};
-
-} // end anonymous namespace
-
-static void EraseTerminatorAndDCECond(Instruction *TI,
- MemorySSAUpdater *MSSAU = nullptr) {
- Instruction *Cond = nullptr;
- if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
- Cond = dyn_cast<Instruction>(SI->getCondition());
- } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
- if (BI->isConditional())
- Cond = dyn_cast<Instruction>(BI->getCondition());
- } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
- Cond = dyn_cast<Instruction>(IBI->getAddress());
- }
-
- TI->eraseFromParent();
- if (Cond)
- RecursivelyDeleteTriviallyDeadInstructions(Cond, nullptr, MSSAU);
-}
-
-/// Return true if the specified terminator checks
-/// to see if a value is equal to constant integer value.
-Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
- Value *CV = nullptr;
- if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
- // Do not permit merging of large switch instructions into their
- // predecessors unless there is only one predecessor.
- if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
- CV = SI->getCondition();
- } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
- if (BI->isConditional() && BI->getCondition()->hasOneUse())
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
- if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
- CV = ICI->getOperand(0);
- }
-
- // Unwrap any lossless ptrtoint cast.
- if (CV) {
- if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
- Value *Ptr = PTII->getPointerOperand();
- if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
- CV = Ptr;
- }
- }
- return CV;
-}
-
-/// Given a value comparison instruction,
-/// decode all of the 'cases' that it represents and return the 'default' block.
-BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
- Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
- if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
- Cases.reserve(SI->getNumCases());
- for (auto Case : SI->cases())
- Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
- Case.getCaseSuccessor()));
- return SI->getDefaultDest();
- }
-
- BranchInst *BI = cast<BranchInst>(TI);
- ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
- BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
- Cases.push_back(ValueEqualityComparisonCase(
- GetConstantInt(ICI->getOperand(1), DL), Succ));
- return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
-}
-
-/// Given a vector of bb/value pairs, remove any entries
-/// in the list that match the specified block.
-static void
-EliminateBlockCases(BasicBlock *BB,
- std::vector<ValueEqualityComparisonCase> &Cases) {
- Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end());
-}
-
-/// Return true if there are any keys in C1 that exist in C2 as well.
-static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
- std::vector<ValueEqualityComparisonCase> &C2) {
- std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
-
- // Make V1 be smaller than V2.
- if (V1->size() > V2->size())
- std::swap(V1, V2);
-
- if (V1->empty())
- return false;
- if (V1->size() == 1) {
- // Just scan V2.
- ConstantInt *TheVal = (*V1)[0].Value;
- for (unsigned i = 0, e = V2->size(); i != e; ++i)
- if (TheVal == (*V2)[i].Value)
- return true;
- }
-
- // Otherwise, just sort both lists and compare element by element.
- array_pod_sort(V1->begin(), V1->end());
- array_pod_sort(V2->begin(), V2->end());
- unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
- while (i1 != e1 && i2 != e2) {
- if ((*V1)[i1].Value == (*V2)[i2].Value)
- return true;
- if ((*V1)[i1].Value < (*V2)[i2].Value)
- ++i1;
- else
- ++i2;
- }
- return false;
-}
-
-// Set branch weights on SwitchInst. This sets the metadata if there is at
-// least one non-zero weight.
-static void setBranchWeights(SwitchInst *SI, ArrayRef<uint32_t> Weights) {
- // Check that there is at least one non-zero weight. Otherwise, pass
- // nullptr to setMetadata which will erase the existing metadata.
- MDNode *N = nullptr;
- if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
- N = MDBuilder(SI->getParent()->getContext()).createBranchWeights(Weights);
- SI->setMetadata(LLVMContext::MD_prof, N);
-}
-
-// Similar to the above, but for branch and select instructions that take
-// exactly 2 weights.
-static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
- uint32_t FalseWeight) {
- assert(isa<BranchInst>(I) || isa<SelectInst>(I));
- // Check that there is at least one non-zero weight. Otherwise, pass
- // nullptr to setMetadata which will erase the existing metadata.
- MDNode *N = nullptr;
- if (TrueWeight || FalseWeight)
- N = MDBuilder(I->getParent()->getContext())
- .createBranchWeights(TrueWeight, FalseWeight);
- I->setMetadata(LLVMContext::MD_prof, N);
-}
-
-/// If TI is known to be a terminator instruction and its block is known to
-/// only have a single predecessor block, check to see if that predecessor is
-/// also a value comparison with the same value, and if that comparison
-/// determines the outcome of this comparison. If so, simplify TI. This does a
-/// very limited form of jump threading.
-bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
- Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
- Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
- if (!PredVal)
- return false; // Not a value comparison in predecessor.
-
- Value *ThisVal = isValueEqualityComparison(TI);
- assert(ThisVal && "This isn't a value comparison!!");
- if (ThisVal != PredVal)
- return false; // Different predicates.
-
- // TODO: Preserve branch weight metadata, similarly to how
- // FoldValueComparisonIntoPredecessors preserves it.
-
- // Find out information about when control will move from Pred to TI's block.
- std::vector<ValueEqualityComparisonCase> PredCases;
- BasicBlock *PredDef =
- GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
- EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
-
- // Find information about how control leaves this block.
- std::vector<ValueEqualityComparisonCase> ThisCases;
- BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
- EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
-
- // If TI's block is the default block from Pred's comparison, potentially
- // simplify TI based on this knowledge.
- if (PredDef == TI->getParent()) {
- // If we are here, we know that the value is none of those cases listed in
- // PredCases. If there are any cases in ThisCases that are in PredCases, we
- // can simplify TI.
- if (!ValuesOverlap(PredCases, ThisCases))
- return false;
-
- if (isa<BranchInst>(TI)) {
- // Okay, one of the successors of this condbr is dead. Convert it to a
- // uncond br.
- assert(ThisCases.size() == 1 && "Branch can only have one case!");
- // Insert the new branch.
- Instruction *NI = Builder.CreateBr(ThisDef);
- (void)NI;
-
- // Remove PHI node entries for the dead edge.
- ThisCases[0].Dest->removePredecessor(TI->getParent());
-
- LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
- << "Through successor TI: " << *TI << "Leaving: " << *NI
- << "\n");
-
- EraseTerminatorAndDCECond(TI);
- return true;
- }
-
- SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
- // Okay, TI has cases that are statically dead, prune them away.
- SmallPtrSet<Constant *, 16> DeadCases;
- for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
- DeadCases.insert(PredCases[i].Value);
-
- LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
- << "Through successor TI: " << *TI);
-
- for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
- --i;
- if (DeadCases.count(i->getCaseValue())) {
- i->getCaseSuccessor()->removePredecessor(TI->getParent());
- SI.removeCase(i);
- }
- }
- LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
- return true;
- }
-
- // Otherwise, TI's block must correspond to some matched value. Find out
- // which value (or set of values) this is.
- ConstantInt *TIV = nullptr;
- BasicBlock *TIBB = TI->getParent();
- for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
- if (PredCases[i].Dest == TIBB) {
- if (TIV)
- return false; // Cannot handle multiple values coming to this block.
- TIV = PredCases[i].Value;
- }
- assert(TIV && "No edge from pred to succ?");
-
- // Okay, we found the one constant that our value can be if we get into TI's
- // BB. Find out which successor will unconditionally be branched to.
- BasicBlock *TheRealDest = nullptr;
- for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
- if (ThisCases[i].Value == TIV) {
- TheRealDest = ThisCases[i].Dest;
- break;
- }
-
- // If not handled by any explicit cases, it is handled by the default case.
- if (!TheRealDest)
- TheRealDest = ThisDef;
-
- // Remove PHI node entries for dead edges.
- BasicBlock *CheckEdge = TheRealDest;
- for (BasicBlock *Succ : successors(TIBB))
- if (Succ != CheckEdge)
- Succ->removePredecessor(TIBB);
- else
- CheckEdge = nullptr;
-
- // Insert the new branch.
- Instruction *NI = Builder.CreateBr(TheRealDest);
- (void)NI;
-
- LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
- << "Through successor TI: " << *TI << "Leaving: " << *NI
- << "\n");
-
- EraseTerminatorAndDCECond(TI);
- return true;
-}
-
-namespace {
-
-/// This class implements a stable ordering of constant
-/// integers that does not depend on their address. This is important for
-/// applications that sort ConstantInt's to ensure uniqueness.
-struct ConstantIntOrdering {
- bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
- return LHS->getValue().ult(RHS->getValue());
- }
-};
-
-} // end anonymous namespace
-
-static int ConstantIntSortPredicate(ConstantInt *const *P1,
- ConstantInt *const *P2) {
- const ConstantInt *LHS = *P1;
- const ConstantInt *RHS = *P2;
- if (LHS == RHS)
- return 0;
- return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
-}
-
-static inline bool HasBranchWeights(const Instruction *I) {
- MDNode *ProfMD = I->getMetadata(LLVMContext::MD_prof);
- if (ProfMD && ProfMD->getOperand(0))
- if (MDString *MDS = dyn_cast<MDString>(ProfMD->getOperand(0)))
- return MDS->getString().equals("branch_weights");
-
- return false;
-}
-
-/// Get Weights of a given terminator, the default weight is at the front
-/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
-/// metadata.
-static void GetBranchWeights(Instruction *TI,
- SmallVectorImpl<uint64_t> &Weights) {
- MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
- assert(MD);
- for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
- ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(i));
- Weights.push_back(CI->getValue().getZExtValue());
- }
-
- // If TI is a conditional eq, the default case is the false case,
- // and the corresponding branch-weight data is at index 2. We swap the
- // default weight to be the first entry.
- if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
- assert(Weights.size() == 2);
- ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
- if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
- std::swap(Weights.front(), Weights.back());
- }
-}
-
-/// Keep halving the weights until all can fit in uint32_t.
-static void FitWeights(MutableArrayRef<uint64_t> Weights) {
- uint64_t Max = *std::max_element(Weights.begin(), Weights.end());
- if (Max > UINT_MAX) {
- unsigned Offset = 32 - countLeadingZeros(Max);
- for (uint64_t &I : Weights)
- I >>= Offset;
- }
-}
-
-/// The specified terminator is a value equality comparison instruction
-/// (either a switch or a branch on "X == c").
-/// See if any of the predecessors of the terminator block are value comparisons
-/// on the same value. If so, and if safe to do so, fold them together.
-bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
- IRBuilder<> &Builder) {
- BasicBlock *BB = TI->getParent();
- Value *CV = isValueEqualityComparison(TI); // CondVal
- assert(CV && "Not a comparison?");
- bool Changed = false;
-
- SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
- while (!Preds.empty()) {
- BasicBlock *Pred = Preds.pop_back_val();
-
- // See if the predecessor is a comparison with the same value.
- Instruction *PTI = Pred->getTerminator();
- Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
-
- if (PCV == CV && TI != PTI) {
- SmallSetVector<BasicBlock*, 4> FailBlocks;
- if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
- for (auto *Succ : FailBlocks) {
- if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split"))
- return false;
- }
- }
-
- // Figure out which 'cases' to copy from SI to PSI.
- std::vector<ValueEqualityComparisonCase> BBCases;
- BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
-
- std::vector<ValueEqualityComparisonCase> PredCases;
- BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
-
- // Based on whether the default edge from PTI goes to BB or not, fill in
- // PredCases and PredDefault with the new switch cases we would like to
- // build.
- SmallVector<BasicBlock *, 8> NewSuccessors;
-
- // Update the branch weight metadata along the way
- SmallVector<uint64_t, 8> Weights;
- bool PredHasWeights = HasBranchWeights(PTI);
- bool SuccHasWeights = HasBranchWeights(TI);
-
- if (PredHasWeights) {
- GetBranchWeights(PTI, Weights);
- // branch-weight metadata is inconsistent here.
- if (Weights.size() != 1 + PredCases.size())
- PredHasWeights = SuccHasWeights = false;
- } else if (SuccHasWeights)
- // If there are no predecessor weights but there are successor weights,
- // populate Weights with 1, which will later be scaled to the sum of
- // successor's weights
- Weights.assign(1 + PredCases.size(), 1);
-
- SmallVector<uint64_t, 8> SuccWeights;
- if (SuccHasWeights) {
- GetBranchWeights(TI, SuccWeights);
- // branch-weight metadata is inconsistent here.
- if (SuccWeights.size() != 1 + BBCases.size())
- PredHasWeights = SuccHasWeights = false;
- } else if (PredHasWeights)
- SuccWeights.assign(1 + BBCases.size(), 1);
-
- if (PredDefault == BB) {
- // If this is the default destination from PTI, only the edges in TI
- // that don't occur in PTI, or that branch to BB will be activated.
- std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
- for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
- if (PredCases[i].Dest != BB)
- PTIHandled.insert(PredCases[i].Value);
- else {
- // The default destination is BB, we don't need explicit targets.
- std::swap(PredCases[i], PredCases.back());
-
- if (PredHasWeights || SuccHasWeights) {
- // Increase weight for the default case.
- Weights[0] += Weights[i + 1];
- std::swap(Weights[i + 1], Weights.back());
- Weights.pop_back();
- }
-
- PredCases.pop_back();
- --i;
- --e;
- }
-
- // Reconstruct the new switch statement we will be building.
- if (PredDefault != BBDefault) {
- PredDefault->removePredecessor(Pred);
- PredDefault = BBDefault;
- NewSuccessors.push_back(BBDefault);
- }
-
- unsigned CasesFromPred = Weights.size();
- uint64_t ValidTotalSuccWeight = 0;
- for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
- if (!PTIHandled.count(BBCases[i].Value) &&
- BBCases[i].Dest != BBDefault) {
- PredCases.push_back(BBCases[i]);
- NewSuccessors.push_back(BBCases[i].Dest);
- if (SuccHasWeights || PredHasWeights) {
- // The default weight is at index 0, so weight for the ith case
- // should be at index i+1. Scale the cases from successor by
- // PredDefaultWeight (Weights[0]).
- Weights.push_back(Weights[0] * SuccWeights[i + 1]);
- ValidTotalSuccWeight += SuccWeights[i + 1];
- }
- }
-
- if (SuccHasWeights || PredHasWeights) {
- ValidTotalSuccWeight += SuccWeights[0];
- // Scale the cases from predecessor by ValidTotalSuccWeight.
- for (unsigned i = 1; i < CasesFromPred; ++i)
- Weights[i] *= ValidTotalSuccWeight;
- // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
- Weights[0] *= SuccWeights[0];
- }
- } else {
- // If this is not the default destination from PSI, only the edges
- // in SI that occur in PSI with a destination of BB will be
- // activated.
- std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
- std::map<ConstantInt *, uint64_t> WeightsForHandled;
- for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
- if (PredCases[i].Dest == BB) {
- PTIHandled.insert(PredCases[i].Value);
-
- if (PredHasWeights || SuccHasWeights) {
- WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
- std::swap(Weights[i + 1], Weights.back());
- Weights.pop_back();
- }
-
- std::swap(PredCases[i], PredCases.back());
- PredCases.pop_back();
- --i;
- --e;
- }
-
- // Okay, now we know which constants were sent to BB from the
- // predecessor. Figure out where they will all go now.
- for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
- if (PTIHandled.count(BBCases[i].Value)) {
- // If this is one we are capable of getting...
- if (PredHasWeights || SuccHasWeights)
- Weights.push_back(WeightsForHandled[BBCases[i].Value]);
- PredCases.push_back(BBCases[i]);
- NewSuccessors.push_back(BBCases[i].Dest);
- PTIHandled.erase(
- BBCases[i].Value); // This constant is taken care of
- }
-
- // If there are any constants vectored to BB that TI doesn't handle,
- // they must go to the default destination of TI.
- for (ConstantInt *I : PTIHandled) {
- if (PredHasWeights || SuccHasWeights)
- Weights.push_back(WeightsForHandled[I]);
- PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
- NewSuccessors.push_back(BBDefault);
- }
- }
-
- // Okay, at this point, we know which new successor Pred will get. Make
- // sure we update the number of entries in the PHI nodes for these
- // successors.
- for (BasicBlock *NewSuccessor : NewSuccessors)
- AddPredecessorToBlock(NewSuccessor, Pred, BB);
-
- Builder.SetInsertPoint(PTI);
- // Convert pointer to int before we switch.
- if (CV->getType()->isPointerTy()) {
- CV = Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()),
- "magicptr");
- }
-
- // Now that the successors are updated, create the new Switch instruction.
- SwitchInst *NewSI =
- Builder.CreateSwitch(CV, PredDefault, PredCases.size());
- NewSI->setDebugLoc(PTI->getDebugLoc());
- for (ValueEqualityComparisonCase &V : PredCases)
- NewSI->addCase(V.Value, V.Dest);
-
- if (PredHasWeights || SuccHasWeights) {
- // Halve the weights if any of them cannot fit in an uint32_t
- FitWeights(Weights);
-
- SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
-
- setBranchWeights(NewSI, MDWeights);
- }
-
- EraseTerminatorAndDCECond(PTI);
-
- // Okay, last check. If BB is still a successor of PSI, then we must
- // have an infinite loop case. If so, add an infinitely looping block
- // to handle the case to preserve the behavior of the code.
- BasicBlock *InfLoopBlock = nullptr;
- for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
- if (NewSI->getSuccessor(i) == BB) {
- if (!InfLoopBlock) {
- // Insert it at the end of the function, because it's either code,
- // or it won't matter if it's hot. :)
- InfLoopBlock = BasicBlock::Create(BB->getContext(), "infloop",
- BB->getParent());
- BranchInst::Create(InfLoopBlock, InfLoopBlock);
- }
- NewSI->setSuccessor(i, InfLoopBlock);
- }
-
- Changed = true;
- }
- }
- return Changed;
-}
-
-// If we would need to insert a select that uses the value of this invoke
-// (comments in HoistThenElseCodeToIf explain why we would need to do this), we
-// can't hoist the invoke, as there is nowhere to put the select in this case.
-static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
- Instruction *I1, Instruction *I2) {
- for (BasicBlock *Succ : successors(BB1)) {
- for (const PHINode &PN : Succ->phis()) {
- Value *BB1V = PN.getIncomingValueForBlock(BB1);
- Value *BB2V = PN.getIncomingValueForBlock(BB2);
- if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
- return false;
- }
- }
- }
- return true;
-}
-
-static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I);
-
-/// Given a conditional branch that goes to BB1 and BB2, hoist any common code
-/// in the two blocks up into the branch block. The caller of this function
-/// guarantees that BI's block dominates BB1 and BB2.
-static bool HoistThenElseCodeToIf(BranchInst *BI,
- const TargetTransformInfo &TTI) {
- // This does very trivial matching, with limited scanning, to find identical
- // instructions in the two blocks. In particular, we don't want to get into
- // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
- // such, we currently just scan for obviously identical instructions in an
- // identical order.
- BasicBlock *BB1 = BI->getSuccessor(0); // The true destination.
- BasicBlock *BB2 = BI->getSuccessor(1); // The false destination
-
- BasicBlock::iterator BB1_Itr = BB1->begin();
- BasicBlock::iterator BB2_Itr = BB2->begin();
-
- Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++;
- // Skip debug info if it is not identical.
- DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
- DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
- if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
- while (isa<DbgInfoIntrinsic>(I1))
- I1 = &*BB1_Itr++;
- while (isa<DbgInfoIntrinsic>(I2))
- I2 = &*BB2_Itr++;
- }
- // FIXME: Can we define a safety predicate for CallBr?
- if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
- (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) ||
- isa<CallBrInst>(I1))
- return false;
-
- BasicBlock *BIParent = BI->getParent();
-
- bool Changed = false;
- do {
- // If we are hoisting the terminator instruction, don't move one (making a
- // broken BB), instead clone it, and remove BI.
- if (I1->isTerminator())
- goto HoistTerminator;
-
- // If we're going to hoist a call, make sure that the two instructions we're
- // commoning/hoisting are both marked with musttail, or neither of them is
- // marked as such. Otherwise, we might end up in a situation where we hoist
- // from a block where the terminator is a `ret` to a block where the terminator
- // is a `br`, and `musttail` calls expect to be followed by a return.
- auto *C1 = dyn_cast<CallInst>(I1);
- auto *C2 = dyn_cast<CallInst>(I2);
- if (C1 && C2)
- if (C1->isMustTailCall() != C2->isMustTailCall())
- return Changed;
-
- if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
- return Changed;
-
- if (isa<DbgInfoIntrinsic>(I1) || isa<DbgInfoIntrinsic>(I2)) {
- assert (isa<DbgInfoIntrinsic>(I1) && isa<DbgInfoIntrinsic>(I2));
- // The debug location is an integral part of a debug info intrinsic
- // and can't be separated from it or replaced. Instead of attempting
- // to merge locations, simply hoist both copies of the intrinsic.
- BIParent->getInstList().splice(BI->getIterator(),
- BB1->getInstList(), I1);
- BIParent->getInstList().splice(BI->getIterator(),
- BB2->getInstList(), I2);
- Changed = true;
- } else {
- // For a normal instruction, we just move one to right before the branch,
- // then replace all uses of the other with the first. Finally, we remove
- // the now redundant second instruction.
- BIParent->getInstList().splice(BI->getIterator(),
- BB1->getInstList(), I1);
- if (!I2->use_empty())
- I2->replaceAllUsesWith(I1);
- I1->andIRFlags(I2);
- unsigned KnownIDs[] = {LLVMContext::MD_tbaa,
- LLVMContext::MD_range,
- LLVMContext::MD_fpmath,
- LLVMContext::MD_invariant_load,
- LLVMContext::MD_nonnull,
- LLVMContext::MD_invariant_group,
- LLVMContext::MD_align,
- LLVMContext::MD_dereferenceable,
- LLVMContext::MD_dereferenceable_or_null,
- LLVMContext::MD_mem_parallel_loop_access,
- LLVMContext::MD_access_group};
- combineMetadata(I1, I2, KnownIDs, true);
-
- // I1 and I2 are being combined into a single instruction. Its debug
- // location is the merged locations of the original instructions.
- I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
-
- I2->eraseFromParent();
- Changed = true;
- }
-
- I1 = &*BB1_Itr++;
- I2 = &*BB2_Itr++;
- // Skip debug info if it is not identical.
- DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
- DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
- if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
- while (isa<DbgInfoIntrinsic>(I1))
- I1 = &*BB1_Itr++;
- while (isa<DbgInfoIntrinsic>(I2))
- I2 = &*BB2_Itr++;
- }
- } while (I1->isIdenticalToWhenDefined(I2));
-
- return true;
-
-HoistTerminator:
- // It may not be possible to hoist an invoke.
- // FIXME: Can we define a safety predicate for CallBr?
- if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
- return Changed;
-
- // TODO: callbr hoisting currently disabled pending further study.
- if (isa<CallBrInst>(I1))
- return Changed;
-
- for (BasicBlock *Succ : successors(BB1)) {
- for (PHINode &PN : Succ->phis()) {
- Value *BB1V = PN.getIncomingValueForBlock(BB1);
- Value *BB2V = PN.getIncomingValueForBlock(BB2);
- if (BB1V == BB2V)
- continue;
-
- // Check for passingValueIsAlwaysUndefined here because we would rather
- // eliminate undefined control flow then converting it to a select.
- if (passingValueIsAlwaysUndefined(BB1V, &PN) ||
- passingValueIsAlwaysUndefined(BB2V, &PN))
- return Changed;
-
- if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
- return Changed;
- if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V))
- return Changed;
- }
- }
-
- // Okay, it is safe to hoist the terminator.
- Instruction *NT = I1->clone();
- BIParent->getInstList().insert(BI->getIterator(), NT);
- if (!NT->getType()->isVoidTy()) {
- I1->replaceAllUsesWith(NT);
- I2->replaceAllUsesWith(NT);
- NT->takeName(I1);
- }
-
- // Ensure terminator gets a debug location, even an unknown one, in case
- // it involves inlinable calls.
- NT->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
-
- // PHIs created below will adopt NT's merged DebugLoc.
- IRBuilder<NoFolder> Builder(NT);
-
- // Hoisting one of the terminators from our successor is a great thing.
- // Unfortunately, the successors of the if/else blocks may have PHI nodes in
- // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI
- // nodes, so we insert select instruction to compute the final result.
- std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
- for (BasicBlock *Succ : successors(BB1)) {
- for (PHINode &PN : Succ->phis()) {
- Value *BB1V = PN.getIncomingValueForBlock(BB1);
- Value *BB2V = PN.getIncomingValueForBlock(BB2);
- if (BB1V == BB2V)
- continue;
-
- // These values do not agree. Insert a select instruction before NT
- // that determines the right value.
- SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
- if (!SI)
- SI = cast<SelectInst>(
- Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
- BB1V->getName() + "." + BB2V->getName(), BI));
-
- // Make the PHI node use the select for all incoming values for BB1/BB2
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
- if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
- PN.setIncomingValue(i, SI);
- }
- }
-
- // Update any PHI nodes in our new successors.
- for (BasicBlock *Succ : successors(BB1))
- AddPredecessorToBlock(Succ, BIParent, BB1);
-
- EraseTerminatorAndDCECond(BI);
- return true;
-}
-
-// All instructions in Insts belong to different blocks that all unconditionally
-// branch to a common successor. Analyze each instruction and return true if it
-// would be possible to sink them into their successor, creating one common
-// instruction instead. For every value that would be required to be provided by
-// PHI node (because an operand varies in each input block), add to PHIOperands.
-static bool canSinkInstructions(
- ArrayRef<Instruction *> Insts,
- DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
- // Prune out obviously bad instructions to move. Any non-store instruction
- // must have exactly one use, and we check later that use is by a single,
- // common PHI instruction in the successor.
- for (auto *I : Insts) {
- // These instructions may change or break semantics if moved.
- if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
- I->getType()->isTokenTy())
- return false;
-
- // Conservatively return false if I is an inline-asm instruction. Sinking
- // and merging inline-asm instructions can potentially create arguments
- // that cannot satisfy the inline-asm constraints.
- if (const auto *C = dyn_cast<CallBase>(I))
- if (C->isInlineAsm())
- return false;
-
- // Everything must have only one use too, apart from stores which
- // have no uses.
- if (!isa<StoreInst>(I) && !I->hasOneUse())
- return false;
- }
-
- const Instruction *I0 = Insts.front();
- for (auto *I : Insts)
- if (!I->isSameOperationAs(I0))
- return false;
-
- // All instructions in Insts are known to be the same opcode. If they aren't
- // stores, check the only user of each is a PHI or in the same block as the
- // instruction, because if a user is in the same block as an instruction
- // we're contemplating sinking, it must already be determined to be sinkable.
- if (!isa<StoreInst>(I0)) {
- auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
- auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
- if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
- auto *U = cast<Instruction>(*I->user_begin());
- return (PNUse &&
- PNUse->getParent() == Succ &&
- PNUse->getIncomingValueForBlock(I->getParent()) == I) ||
- U->getParent() == I->getParent();
- }))
- return false;
- }
-
- // Because SROA can't handle speculating stores of selects, try not
- // to sink loads or stores of allocas when we'd have to create a PHI for
- // the address operand. Also, because it is likely that loads or stores
- // of allocas will disappear when Mem2Reg/SROA is run, don't sink them.
- // This can cause code churn which can have unintended consequences down
- // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
- // FIXME: This is a workaround for a deficiency in SROA - see
- // https://llvm.org/bugs/show_bug.cgi?id=30188
- if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) {
- return isa<AllocaInst>(I->getOperand(1));
- }))
- return false;
- if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
- return isa<AllocaInst>(I->getOperand(0));
- }))
- return false;
-
- for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
- if (I0->getOperand(OI)->getType()->isTokenTy())
- // Don't touch any operand of token type.
- return false;
-
- auto SameAsI0 = [&I0, OI](const Instruction *I) {
- assert(I->getNumOperands() == I0->getNumOperands());
- return I->getOperand(OI) == I0->getOperand(OI);
- };
- if (!all_of(Insts, SameAsI0)) {
- if (!canReplaceOperandWithVariable(I0, OI))
- // We can't create a PHI from this GEP.
- return false;
- // Don't create indirect calls! The called value is the final operand.
- if (isa<CallBase>(I0) && OI == OE - 1) {
- // FIXME: if the call was *already* indirect, we should do this.
- return false;
- }
- for (auto *I : Insts)
- PHIOperands[I].push_back(I->getOperand(OI));
- }
- }
- return true;
-}
-
-// Assuming canSinkLastInstruction(Blocks) has returned true, sink the last
-// instruction of every block in Blocks to their common successor, commoning
-// into one instruction.
-static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
- auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
-
- // canSinkLastInstruction returning true guarantees that every block has at
- // least one non-terminator instruction.
- SmallVector<Instruction*,4> Insts;
- for (auto *BB : Blocks) {
- Instruction *I = BB->getTerminator();
- do {
- I = I->getPrevNode();
- } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front());
- if (!isa<DbgInfoIntrinsic>(I))
- Insts.push_back(I);
- }
-
- // The only checking we need to do now is that all users of all instructions
- // are the same PHI node. canSinkLastInstruction should have checked this but
- // it is slightly over-aggressive - it gets confused by commutative instructions
- // so double-check it here.
- Instruction *I0 = Insts.front();
- if (!isa<StoreInst>(I0)) {
- auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
- if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
- auto *U = cast<Instruction>(*I->user_begin());
- return U == PNUse;
- }))
- return false;
- }
-
- // We don't need to do any more checking here; canSinkLastInstruction should
- // have done it all for us.
- SmallVector<Value*, 4> NewOperands;
- for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
- // This check is different to that in canSinkLastInstruction. There, we
- // cared about the global view once simplifycfg (and instcombine) have
- // completed - it takes into account PHIs that become trivially
- // simplifiable. However here we need a more local view; if an operand
- // differs we create a PHI and rely on instcombine to clean up the very
- // small mess we may make.
- bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
- return I->getOperand(O) != I0->getOperand(O);
- });
- if (!NeedPHI) {
- NewOperands.push_back(I0->getOperand(O));
- continue;
- }
-
- // Create a new PHI in the successor block and populate it.
- auto *Op = I0->getOperand(O);
- assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
- auto *PN = PHINode::Create(Op->getType(), Insts.size(),
- Op->getName() + ".sink", &BBEnd->front());
- for (auto *I : Insts)
- PN->addIncoming(I->getOperand(O), I->getParent());
- NewOperands.push_back(PN);
- }
-
- // Arbitrarily use I0 as the new "common" instruction; remap its operands
- // and move it to the start of the successor block.
- for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
- I0->getOperandUse(O).set(NewOperands[O]);
- I0->moveBefore(&*BBEnd->getFirstInsertionPt());
-
- // Update metadata and IR flags, and merge debug locations.
- for (auto *I : Insts)
- if (I != I0) {
- // The debug location for the "common" instruction is the merged locations
- // of all the commoned instructions. We start with the original location
- // of the "common" instruction and iteratively merge each location in the
- // loop below.
- // This is an N-way merge, which will be inefficient if I0 is a CallInst.
- // However, as N-way merge for CallInst is rare, so we use simplified API
- // instead of using complex API for N-way merge.
- I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
- combineMetadataForCSE(I0, I, true);
- I0->andIRFlags(I);
- }
-
- if (!isa<StoreInst>(I0)) {
- // canSinkLastInstruction checked that all instructions were used by
- // one and only one PHI node. Find that now, RAUW it to our common
- // instruction and nuke it.
- assert(I0->hasOneUse());
- auto *PN = cast<PHINode>(*I0->user_begin());
- PN->replaceAllUsesWith(I0);
- PN->eraseFromParent();
- }
-
- // Finally nuke all instructions apart from the common instruction.
- for (auto *I : Insts)
- if (I != I0)
- I->eraseFromParent();
-
- return true;
-}
-
-namespace {
-
- // LockstepReverseIterator - Iterates through instructions
- // in a set of blocks in reverse order from the first non-terminator.
- // For example (assume all blocks have size n):
- // LockstepReverseIterator I([B1, B2, B3]);
- // *I-- = [B1[n], B2[n], B3[n]];
- // *I-- = [B1[n-1], B2[n-1], B3[n-1]];
- // *I-- = [B1[n-2], B2[n-2], B3[n-2]];
- // ...
- class LockstepReverseIterator {
- ArrayRef<BasicBlock*> Blocks;
- SmallVector<Instruction*,4> Insts;
- bool Fail;
-
- public:
- LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) {
- reset();
- }
-
- void reset() {
- Fail = false;
- Insts.clear();
- for (auto *BB : Blocks) {
- Instruction *Inst = BB->getTerminator();
- for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
- Inst = Inst->getPrevNode();
- if (!Inst) {
- // Block wasn't big enough.
- Fail = true;
- return;
- }
- Insts.push_back(Inst);
- }
- }
-
- bool isValid() const {
- return !Fail;
- }
-
- void operator--() {
- if (Fail)
- return;
- for (auto *&Inst : Insts) {
- for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
- Inst = Inst->getPrevNode();
- // Already at beginning of block.
- if (!Inst) {
- Fail = true;
- return;
- }
- }
- }
-
- ArrayRef<Instruction*> operator * () const {
- return Insts;
- }
- };
-
-} // end anonymous namespace
-
-/// Check whether BB's predecessors end with unconditional branches. If it is
-/// true, sink any common code from the predecessors to BB.
-/// We also allow one predecessor to end with conditional branch (but no more
-/// than one).
-static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
- // We support two situations:
- // (1) all incoming arcs are unconditional
- // (2) one incoming arc is conditional
- //
- // (2) is very common in switch defaults and
- // else-if patterns;
- //
- // if (a) f(1);
- // else if (b) f(2);
- //
- // produces:
- //
- // [if]
- // / \
- // [f(1)] [if]
- // | | \
- // | | |
- // | [f(2)]|
- // \ | /
- // [ end ]
- //
- // [end] has two unconditional predecessor arcs and one conditional. The
- // conditional refers to the implicit empty 'else' arc. This conditional
- // arc can also be caused by an empty default block in a switch.
- //
- // In this case, we attempt to sink code from all *unconditional* arcs.
- // If we can sink instructions from these arcs (determined during the scan
- // phase below) we insert a common successor for all unconditional arcs and
- // connect that to [end], to enable sinking:
- //
- // [if]
- // / \
- // [x(1)] [if]
- // | | \
- // | | \
- // | [x(2)] |
- // \ / |
- // [sink.split] |
- // \ /
- // [ end ]
- //
- SmallVector<BasicBlock*,4> UnconditionalPreds;
- Instruction *Cond = nullptr;
- for (auto *B : predecessors(BB)) {
- auto *T = B->getTerminator();
- if (isa<BranchInst>(T) && cast<BranchInst>(T)->isUnconditional())
- UnconditionalPreds.push_back(B);
- else if ((isa<BranchInst>(T) || isa<SwitchInst>(T)) && !Cond)
- Cond = T;
- else
- return false;
- }
- if (UnconditionalPreds.size() < 2)
- return false;
-
- bool Changed = false;
- // We take a two-step approach to tail sinking. First we scan from the end of
- // each block upwards in lockstep. If the n'th instruction from the end of each
- // block can be sunk, those instructions are added to ValuesToSink and we
- // carry on. If we can sink an instruction but need to PHI-merge some operands
- // (because they're not identical in each instruction) we add these to
- // PHIOperands.
- unsigned ScanIdx = 0;
- SmallPtrSet<Value*,4> InstructionsToSink;
- DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands;
- LockstepReverseIterator LRI(UnconditionalPreds);
- while (LRI.isValid() &&
- canSinkInstructions(*LRI, PHIOperands)) {
- LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
- << "\n");
- InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
- ++ScanIdx;
- --LRI;
- }
-
- auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
- unsigned NumPHIdValues = 0;
- for (auto *I : *LRI)
- for (auto *V : PHIOperands[I])
- if (InstructionsToSink.count(V) == 0)
- ++NumPHIdValues;
- LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
- unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
- if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
- NumPHIInsts++;
-
- return NumPHIInsts <= 1;
- };
-
- if (ScanIdx > 0 && Cond) {
- // Check if we would actually sink anything first! This mutates the CFG and
- // adds an extra block. The goal in doing this is to allow instructions that
- // couldn't be sunk before to be sunk - obviously, speculatable instructions
- // (such as trunc, add) can be sunk and predicated already. So we check that
- // we're going to sink at least one non-speculatable instruction.
- LRI.reset();
- unsigned Idx = 0;
- bool Profitable = false;
- while (ProfitableToSinkInstruction(LRI) && Idx < ScanIdx) {
- if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
- Profitable = true;
- break;
- }
- --LRI;
- ++Idx;
- }
- if (!Profitable)
- return false;
-
- LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
- // We have a conditional edge and we're going to sink some instructions.
- // Insert a new block postdominating all blocks we're going to sink from.
- if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split"))
- // Edges couldn't be split.
- return false;
- Changed = true;
- }
-
- // Now that we've analyzed all potential sinking candidates, perform the
- // actual sink. We iteratively sink the last non-terminator of the source
- // blocks into their common successor unless doing so would require too
- // many PHI instructions to be generated (currently only one PHI is allowed
- // per sunk instruction).
- //
- // We can use InstructionsToSink to discount values needing PHI-merging that will
- // actually be sunk in a later iteration. This allows us to be more
- // aggressive in what we sink. This does allow a false positive where we
- // sink presuming a later value will also be sunk, but stop half way through
- // and never actually sink it which means we produce more PHIs than intended.
- // This is unlikely in practice though.
- for (unsigned SinkIdx = 0; SinkIdx != ScanIdx; ++SinkIdx) {
- LLVM_DEBUG(dbgs() << "SINK: Sink: "
- << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
- << "\n");
-
- // Because we've sunk every instruction in turn, the current instruction to
- // sink is always at index 0.
- LRI.reset();
- if (!ProfitableToSinkInstruction(LRI)) {
- // Too many PHIs would be created.
- LLVM_DEBUG(
- dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
- break;
- }
-
- if (!sinkLastInstruction(UnconditionalPreds))
- return Changed;
- NumSinkCommons++;
- Changed = true;
- }
- return Changed;
-}
-
-/// Determine if we can hoist sink a sole store instruction out of a
-/// conditional block.
-///
-/// We are looking for code like the following:
-/// BrBB:
-/// store i32 %add, i32* %arrayidx2
-/// ... // No other stores or function calls (we could be calling a memory
-/// ... // function).
-/// %cmp = icmp ult %x, %y
-/// br i1 %cmp, label %EndBB, label %ThenBB
-/// ThenBB:
-/// store i32 %add5, i32* %arrayidx2
-/// br label EndBB
-/// EndBB:
-/// ...
-/// We are going to transform this into:
-/// BrBB:
-/// store i32 %add, i32* %arrayidx2
-/// ... //
-/// %cmp = icmp ult %x, %y
-/// %add.add5 = select i1 %cmp, i32 %add, %add5
-/// store i32 %add.add5, i32* %arrayidx2
-/// ...
-///
-/// \return The pointer to the value of the previous store if the store can be
-/// hoisted into the predecessor block. 0 otherwise.
-static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
- BasicBlock *StoreBB, BasicBlock *EndBB) {
- StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
- if (!StoreToHoist)
- return nullptr;
-
- // Volatile or atomic.
- if (!StoreToHoist->isSimple())
- return nullptr;
-
- Value *StorePtr = StoreToHoist->getPointerOperand();
-
- // Look for a store to the same pointer in BrBB.
- unsigned MaxNumInstToLookAt = 9;
- for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug())) {
- if (!MaxNumInstToLookAt)
- break;
- --MaxNumInstToLookAt;
-
- // Could be calling an instruction that affects memory like free().
- if (CurI.mayHaveSideEffects() && !isa<StoreInst>(CurI))
- return nullptr;
-
- if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
- // Found the previous store make sure it stores to the same location.
- if (SI->getPointerOperand() == StorePtr)
- // Found the previous store, return its value operand.
- return SI->getValueOperand();
- return nullptr; // Unknown store.
- }
- }
-
- return nullptr;
-}
-
-/// Speculate a conditional basic block flattening the CFG.
-///
-/// Note that this is a very risky transform currently. Speculating
-/// instructions like this is most often not desirable. Instead, there is an MI
-/// pass which can do it with full awareness of the resource constraints.
-/// However, some cases are "obvious" and we should do directly. An example of
-/// this is speculating a single, reasonably cheap instruction.
-///
-/// There is only one distinct advantage to flattening the CFG at the IR level:
-/// it makes very common but simplistic optimizations such as are common in
-/// instcombine and the DAG combiner more powerful by removing CFG edges and
-/// modeling their effects with easier to reason about SSA value graphs.
-///
-///
-/// An illustration of this transform is turning this IR:
-/// \code
-/// BB:
-/// %cmp = icmp ult %x, %y
-/// br i1 %cmp, label %EndBB, label %ThenBB
-/// ThenBB:
-/// %sub = sub %x, %y
-/// br label BB2
-/// EndBB:
-/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
-/// ...
-/// \endcode
-///
-/// Into this IR:
-/// \code
-/// BB:
-/// %cmp = icmp ult %x, %y
-/// %sub = sub %x, %y
-/// %cond = select i1 %cmp, 0, %sub
-/// ...
-/// \endcode
-///
-/// \returns true if the conditional block is removed.
-static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
- const TargetTransformInfo &TTI) {
- // Be conservative for now. FP select instruction can often be expensive.
- Value *BrCond = BI->getCondition();
- if (isa<FCmpInst>(BrCond))
- return false;
-
- BasicBlock *BB = BI->getParent();
- BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
-
- // If ThenBB is actually on the false edge of the conditional branch, remember
- // to swap the select operands later.
- bool Invert = false;
- if (ThenBB != BI->getSuccessor(0)) {
- assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
- Invert = true;
- }
- assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
-
- // Keep a count of how many times instructions are used within ThenBB when
- // they are candidates for sinking into ThenBB. Specifically:
- // - They are defined in BB, and
- // - They have no side effects, and
- // - All of their uses are in ThenBB.
- SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
-
- SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
-
- unsigned SpeculationCost = 0;
- Value *SpeculatedStoreValue = nullptr;
- StoreInst *SpeculatedStore = nullptr;
- for (BasicBlock::iterator BBI = ThenBB->begin(),
- BBE = std::prev(ThenBB->end());
- BBI != BBE; ++BBI) {
- Instruction *I = &*BBI;
- // Skip debug info.
- if (isa<DbgInfoIntrinsic>(I)) {
- SpeculatedDbgIntrinsics.push_back(I);
- continue;
- }
-
- // Only speculatively execute a single instruction (not counting the
- // terminator) for now.
- ++SpeculationCost;
- if (SpeculationCost > 1)
- return false;
-
- // Don't hoist the instruction if it's unsafe or expensive.
- if (!isSafeToSpeculativelyExecute(I) &&
- !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
- I, BB, ThenBB, EndBB))))
- return false;
- if (!SpeculatedStoreValue &&
- ComputeSpeculationCost(I, TTI) >
- PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
- return false;
-
- // Store the store speculation candidate.
- if (SpeculatedStoreValue)
- SpeculatedStore = cast<StoreInst>(I);
-
- // Do not hoist the instruction if any of its operands are defined but not
- // used in BB. The transformation will prevent the operand from
- // being sunk into the use block.
- for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) {
- Instruction *OpI = dyn_cast<Instruction>(*i);
- if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
- continue; // Not a candidate for sinking.
-
- ++SinkCandidateUseCounts[OpI];
- }
- }
-
- // Consider any sink candidates which are only used in ThenBB as costs for
- // speculation. Note, while we iterate over a DenseMap here, we are summing
- // and so iteration order isn't significant.
- for (SmallDenseMap<Instruction *, unsigned, 4>::iterator
- I = SinkCandidateUseCounts.begin(),
- E = SinkCandidateUseCounts.end();
- I != E; ++I)
- if (I->first->hasNUses(I->second)) {
- ++SpeculationCost;
- if (SpeculationCost > 1)
- return false;
- }
-
- // Check that the PHI nodes can be converted to selects.
- bool HaveRewritablePHIs = false;
- for (PHINode &PN : EndBB->phis()) {
- Value *OrigV = PN.getIncomingValueForBlock(BB);
- Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
-
- // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
- // Skip PHIs which are trivial.
- if (ThenV == OrigV)
- continue;
-
- // Don't convert to selects if we could remove undefined behavior instead.
- if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
- passingValueIsAlwaysUndefined(ThenV, &PN))
- return false;
-
- HaveRewritablePHIs = true;
- ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
- ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
- if (!OrigCE && !ThenCE)
- continue; // Known safe and cheap.
-
- if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
- (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
- return false;
- unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0;
- unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0;
- unsigned MaxCost =
- 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
- if (OrigCost + ThenCost > MaxCost)
- return false;
-
- // Account for the cost of an unfolded ConstantExpr which could end up
- // getting expanded into Instructions.
- // FIXME: This doesn't account for how many operations are combined in the
- // constant expression.
- ++SpeculationCost;
- if (SpeculationCost > 1)
- return false;
- }
-
- // If there are no PHIs to process, bail early. This helps ensure idempotence
- // as well.
- if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
- return false;
-
- // If we get here, we can hoist the instruction and if-convert.
- LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
-
- // Insert a select of the value of the speculated store.
- if (SpeculatedStoreValue) {
- IRBuilder<NoFolder> Builder(BI);
- Value *TrueV = SpeculatedStore->getValueOperand();
- Value *FalseV = SpeculatedStoreValue;
- if (Invert)
- std::swap(TrueV, FalseV);
- Value *S = Builder.CreateSelect(
- BrCond, TrueV, FalseV, "spec.store.select", BI);
- SpeculatedStore->setOperand(0, S);
- SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
- SpeculatedStore->getDebugLoc());
- }
-
- // Metadata can be dependent on the condition we are hoisting above.
- // Conservatively strip all metadata on the instruction.
- for (auto &I : *ThenBB)
- I.dropUnknownNonDebugMetadata();
-
- // Hoist the instructions.
- BB->getInstList().splice(BI->getIterator(), ThenBB->getInstList(),
- ThenBB->begin(), std::prev(ThenBB->end()));
-
- // Insert selects and rewrite the PHI operands.
- IRBuilder<NoFolder> Builder(BI);
- for (PHINode &PN : EndBB->phis()) {
- unsigned OrigI = PN.getBasicBlockIndex(BB);
- unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
- Value *OrigV = PN.getIncomingValue(OrigI);
- Value *ThenV = PN.getIncomingValue(ThenI);
-
- // Skip PHIs which are trivial.
- if (OrigV == ThenV)
- continue;
-
- // Create a select whose true value is the speculatively executed value and
- // false value is the preexisting value. Swap them if the branch
- // destinations were inverted.
- Value *TrueV = ThenV, *FalseV = OrigV;
- if (Invert)
- std::swap(TrueV, FalseV);
- Value *V = Builder.CreateSelect(
- BrCond, TrueV, FalseV, "spec.select", BI);
- PN.setIncomingValue(OrigI, V);
- PN.setIncomingValue(ThenI, V);
- }
-
- // Remove speculated dbg intrinsics.
- // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
- // dbg value for the different flows and inserting it after the select.
- for (Instruction *I : SpeculatedDbgIntrinsics)
- I->eraseFromParent();
-
- ++NumSpeculations;
- return true;
-}
-
-/// Return true if we can thread a branch across this block.
-static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
- unsigned Size = 0;
-
- for (Instruction &I : BB->instructionsWithoutDebug()) {
- if (Size > 10)
- return false; // Don't clone large BB's.
- ++Size;
-
- // We can only support instructions that do not define values that are
- // live outside of the current basic block.
- for (User *U : I.users()) {
- Instruction *UI = cast<Instruction>(U);
- if (UI->getParent() != BB || isa<PHINode>(UI))
- return false;
- }
-
- // Looks ok, continue checking.
- }
-
- return true;
-}
-
-/// If we have a conditional branch on a PHI node value that is defined in the
-/// same block as the branch and if any PHI entries are constants, thread edges
-/// corresponding to that entry to be branches to their ultimate destination.
-static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
- AssumptionCache *AC) {
- BasicBlock *BB = BI->getParent();
- PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
- // NOTE: we currently cannot transform this case if the PHI node is used
- // outside of the block.
- if (!PN || PN->getParent() != BB || !PN->hasOneUse())
- return false;
-
- // Degenerate case of a single entry PHI.
- if (PN->getNumIncomingValues() == 1) {
- FoldSingleEntryPHINodes(PN->getParent());
- return true;
- }
-
- // Now we know that this block has multiple preds and two succs.
- if (!BlockIsSimpleEnoughToThreadThrough(BB))
- return false;
-
- // Can't fold blocks that contain noduplicate or convergent calls.
- if (any_of(*BB, [](const Instruction &I) {
- const CallInst *CI = dyn_cast<CallInst>(&I);
- return CI && (CI->cannotDuplicate() || CI->isConvergent());
- }))
- return false;
-
- // Okay, this is a simple enough basic block. See if any phi values are
- // constants.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
- if (!CB || !CB->getType()->isIntegerTy(1))
- continue;
-
- // Okay, we now know that all edges from PredBB should be revectored to
- // branch to RealDest.
- BasicBlock *PredBB = PN->getIncomingBlock(i);
- BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
-
- if (RealDest == BB)
- continue; // Skip self loops.
- // Skip if the predecessor's terminator is an indirect branch.
- if (isa<IndirectBrInst>(PredBB->getTerminator()))
- continue;
-
- // The dest block might have PHI nodes, other predecessors and other
- // difficult cases. Instead of being smart about this, just insert a new
- // block that jumps to the destination block, effectively splitting
- // the edge we are about to create.
- BasicBlock *EdgeBB =
- BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge",
- RealDest->getParent(), RealDest);
- BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB);
- CritEdgeBranch->setDebugLoc(BI->getDebugLoc());
-
- // Update PHI nodes.
- AddPredecessorToBlock(RealDest, EdgeBB, BB);
-
- // BB may have instructions that are being threaded over. Clone these
- // instructions into EdgeBB. We know that there will be no uses of the
- // cloned instructions outside of EdgeBB.
- BasicBlock::iterator InsertPt = EdgeBB->begin();
- DenseMap<Value *, Value *> TranslateMap; // Track translated values.
- for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
- if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
- TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
- continue;
- }
- // Clone the instruction.
- Instruction *N = BBI->clone();
- if (BBI->hasName())
- N->setName(BBI->getName() + ".c");
-
- // Update operands due to translation.
- for (User::op_iterator i = N->op_begin(), e = N->op_end(); i != e; ++i) {
- DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(*i);
- if (PI != TranslateMap.end())
- *i = PI->second;
- }
-
- // Check for trivial simplification.
- if (Value *V = SimplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
- if (!BBI->use_empty())
- TranslateMap[&*BBI] = V;
- if (!N->mayHaveSideEffects()) {
- N->deleteValue(); // Instruction folded away, don't need actual inst
- N = nullptr;
- }
- } else {
- if (!BBI->use_empty())
- TranslateMap[&*BBI] = N;
- }
- // Insert the new instruction into its new home.
- if (N)
- EdgeBB->getInstList().insert(InsertPt, N);
-
- // Register the new instruction with the assumption cache if necessary.
- if (auto *II = dyn_cast_or_null<IntrinsicInst>(N))
- if (II->getIntrinsicID() == Intrinsic::assume)
- AC->registerAssumption(II);
- }
-
- // Loop over all of the edges from PredBB to BB, changing them to branch
- // to EdgeBB instead.
- Instruction *PredBBTI = PredBB->getTerminator();
- for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
- if (PredBBTI->getSuccessor(i) == BB) {
- BB->removePredecessor(PredBB);
- PredBBTI->setSuccessor(i, EdgeBB);
- }
-
- // Recurse, simplifying any other constants.
- return FoldCondBranchOnPHI(BI, DL, AC) || true;
- }
-
- return false;
-}
-
-/// Given a BB that starts with the specified two-entry PHI node,
-/// see if we can eliminate it.
-static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
- const DataLayout &DL) {
- // Ok, this is a two entry PHI node. Check to see if this is a simple "if
- // statement", which has a very simple dominance structure. Basically, we
- // are trying to find the condition that is being branched on, which
- // subsequently causes this merge to happen. We really want control
- // dependence information for this check, but simplifycfg can't keep it up
- // to date, and this catches most of the cases we care about anyway.
- BasicBlock *BB = PN->getParent();
- const Function *Fn = BB->getParent();
- if (Fn && Fn->hasFnAttribute(Attribute::OptForFuzzing))
- return false;
-
- BasicBlock *IfTrue, *IfFalse;
- Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);
- if (!IfCond ||
- // Don't bother if the branch will be constant folded trivially.
- isa<ConstantInt>(IfCond))
- return false;
-
- // Okay, we found that we can merge this two-entry phi node into a select.
- // Doing so would require us to fold *all* two entry phi nodes in this block.
- // At some point this becomes non-profitable (particularly if the target
- // doesn't support cmov's). Only do this transformation if there are two or
- // fewer PHI nodes in this block.
- unsigned NumPhis = 0;
- for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
- if (NumPhis > 2)
- return false;
-
- // Loop over the PHI's seeing if we can promote them all to select
- // instructions. While we are at it, keep track of the instructions
- // that need to be moved to the dominating block.
- SmallPtrSet<Instruction *, 4> AggressiveInsts;
- unsigned MaxCostVal0 = PHINodeFoldingThreshold,
- MaxCostVal1 = PHINodeFoldingThreshold;
- MaxCostVal0 *= TargetTransformInfo::TCC_Basic;
- MaxCostVal1 *= TargetTransformInfo::TCC_Basic;
-
- for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
- PHINode *PN = cast<PHINode>(II++);
- if (Value *V = SimplifyInstruction(PN, {DL, PN})) {
- PN->replaceAllUsesWith(V);
- PN->eraseFromParent();
- continue;
- }
-
- if (!DominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
- MaxCostVal0, TTI) ||
- !DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
- MaxCostVal1, TTI))
- return false;
- }
-
- // If we folded the first phi, PN dangles at this point. Refresh it. If
- // we ran out of PHIs then we simplified them all.
- PN = dyn_cast<PHINode>(BB->begin());
- if (!PN)
- return true;
-
- // Don't fold i1 branches on PHIs which contain binary operators. These can
- // often be turned into switches and other things.
- if (PN->getType()->isIntegerTy(1) &&
- (isa<BinaryOperator>(PN->getIncomingValue(0)) ||
- isa<BinaryOperator>(PN->getIncomingValue(1)) ||
- isa<BinaryOperator>(IfCond)))
- return false;
-
- // If all PHI nodes are promotable, check to make sure that all instructions
- // in the predecessor blocks can be promoted as well. If not, we won't be able
- // to get rid of the control flow, so it's not worth promoting to select
- // instructions.
- BasicBlock *DomBlock = nullptr;
- BasicBlock *IfBlock1 = PN->getIncomingBlock(0);
- BasicBlock *IfBlock2 = PN->getIncomingBlock(1);
- if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) {
- IfBlock1 = nullptr;
- } else {
- DomBlock = *pred_begin(IfBlock1);
- for (BasicBlock::iterator I = IfBlock1->begin(); !I->isTerminator(); ++I)
- if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
- // This is not an aggressive instruction that we can promote.
- // Because of this, we won't be able to get rid of the control flow, so
- // the xform is not worth it.
- return false;
- }
- }
-
- if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
- IfBlock2 = nullptr;
- } else {
- DomBlock = *pred_begin(IfBlock2);
- for (BasicBlock::iterator I = IfBlock2->begin(); !I->isTerminator(); ++I)
- if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
- // This is not an aggressive instruction that we can promote.
- // Because of this, we won't be able to get rid of the control flow, so
- // the xform is not worth it.
- return false;
- }
- }
-
- LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond
- << " T: " << IfTrue->getName()
- << " F: " << IfFalse->getName() << "\n");
-
- // If we can still promote the PHI nodes after this gauntlet of tests,
- // do all of the PHI's now.
- Instruction *InsertPt = DomBlock->getTerminator();
- IRBuilder<NoFolder> Builder(InsertPt);
-
- // Move all 'aggressive' instructions, which are defined in the
- // conditional parts of the if's up to the dominating block.
- if (IfBlock1)
- hoistAllInstructionsInto(DomBlock, InsertPt, IfBlock1);
- if (IfBlock2)
- hoistAllInstructionsInto(DomBlock, InsertPt, IfBlock2);
-
- while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
- // Change the PHI node into a select instruction.
- Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
- Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
-
- Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", InsertPt);
- PN->replaceAllUsesWith(Sel);
- Sel->takeName(PN);
- PN->eraseFromParent();
- }
-
- // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
- // has been flattened. Change DomBlock to jump directly to our new block to
- // avoid other simplifycfg's kicking in on the diamond.
- Instruction *OldTI = DomBlock->getTerminator();
- Builder.SetInsertPoint(OldTI);
- Builder.CreateBr(BB);
- OldTI->eraseFromParent();
- return true;
-}
-
-/// If we found a conditional branch that goes to two returning blocks,
-/// try to merge them together into one return,
-/// introducing a select if the return values disagree.
-static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
- IRBuilder<> &Builder) {
- assert(BI->isConditional() && "Must be a conditional branch");
- BasicBlock *TrueSucc = BI->getSuccessor(0);
- BasicBlock *FalseSucc = BI->getSuccessor(1);
- ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator());
- ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator());
-
- // Check to ensure both blocks are empty (just a return) or optionally empty
- // with PHI nodes. If there are other instructions, merging would cause extra
- // computation on one path or the other.
- if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator())
- return false;
- if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator())
- return false;
-
- Builder.SetInsertPoint(BI);
- // Okay, we found a branch that is going to two return nodes. If
- // there is no return value for this function, just change the
- // branch into a return.
- if (FalseRet->getNumOperands() == 0) {
- TrueSucc->removePredecessor(BI->getParent());
- FalseSucc->removePredecessor(BI->getParent());
- Builder.CreateRetVoid();
- EraseTerminatorAndDCECond(BI);
- return true;
- }
-
- // Otherwise, figure out what the true and false return values are
- // so we can insert a new select instruction.
- Value *TrueValue = TrueRet->getReturnValue();
- Value *FalseValue = FalseRet->getReturnValue();
-
- // Unwrap any PHI nodes in the return blocks.
- if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue))
- if (TVPN->getParent() == TrueSucc)
- TrueValue = TVPN->getIncomingValueForBlock(BI->getParent());
- if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue))
- if (FVPN->getParent() == FalseSucc)
- FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
-
- // In order for this transformation to be safe, we must be able to
- // unconditionally execute both operands to the return. This is
- // normally the case, but we could have a potentially-trapping
- // constant expression that prevents this transformation from being
- // safe.
- if (ConstantExpr *TCV = dyn_cast_or_null<ConstantExpr>(TrueValue))
- if (TCV->canTrap())
- return false;
- if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue))
- if (FCV->canTrap())
- return false;
-
- // Okay, we collected all the mapped values and checked them for sanity, and
- // defined to really do this transformation. First, update the CFG.
- TrueSucc->removePredecessor(BI->getParent());
- FalseSucc->removePredecessor(BI->getParent());
-
- // Insert select instructions where needed.
- Value *BrCond = BI->getCondition();
- if (TrueValue) {
- // Insert a select if the results differ.
- if (TrueValue == FalseValue || isa<UndefValue>(FalseValue)) {
- } else if (isa<UndefValue>(TrueValue)) {
- TrueValue = FalseValue;
- } else {
- TrueValue =
- Builder.CreateSelect(BrCond, TrueValue, FalseValue, "retval", BI);
- }
- }
-
- Value *RI =
- !TrueValue ? Builder.CreateRetVoid() : Builder.CreateRet(TrueValue);
-
- (void)RI;
-
- LLVM_DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
- << "\n " << *BI << "NewRet = " << *RI << "TRUEBLOCK: "
- << *TrueSucc << "FALSEBLOCK: " << *FalseSucc);
-
- EraseTerminatorAndDCECond(BI);
-
- return true;
-}
-
-/// Return true if the given instruction is available
-/// in its predecessor block. If yes, the instruction will be removed.
-static bool tryCSEWithPredecessor(Instruction *Inst, BasicBlock *PB) {
- if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst))
- return false;
- for (Instruction &I : *PB) {
- Instruction *PBI = &I;
- // Check whether Inst and PBI generate the same value.
- if (Inst->isIdenticalTo(PBI)) {
- Inst->replaceAllUsesWith(PBI);
- Inst->eraseFromParent();
- return true;
- }
- }
- return false;
-}
-
-/// Return true if either PBI or BI has branch weight available, and store
-/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
-/// not have branch weight, use 1:1 as its weight.
-static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
- uint64_t &PredTrueWeight,
- uint64_t &PredFalseWeight,
- uint64_t &SuccTrueWeight,
- uint64_t &SuccFalseWeight) {
- bool PredHasWeights =
- PBI->extractProfMetadata(PredTrueWeight, PredFalseWeight);
- bool SuccHasWeights =
- BI->extractProfMetadata(SuccTrueWeight, SuccFalseWeight);
- if (PredHasWeights || SuccHasWeights) {
- if (!PredHasWeights)
- PredTrueWeight = PredFalseWeight = 1;
- if (!SuccHasWeights)
- SuccTrueWeight = SuccFalseWeight = 1;
- return true;
- } else {
- return false;
- }
-}
-
-/// If this basic block is simple enough, and if a predecessor branches to us
-/// and one of our successors, fold the block into the predecessor and use
-/// logical operations to pick the right destination.
-bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
- unsigned BonusInstThreshold) {
- BasicBlock *BB = BI->getParent();
-
- const unsigned PredCount = pred_size(BB);
-
- Instruction *Cond = nullptr;
- if (BI->isConditional())
- Cond = dyn_cast<Instruction>(BI->getCondition());
- else {
- // For unconditional branch, check for a simple CFG pattern, where
- // BB has a single predecessor and BB's successor is also its predecessor's
- // successor. If such pattern exists, check for CSE between BB and its
- // predecessor.
- if (BasicBlock *PB = BB->getSinglePredecessor())
- if (BranchInst *PBI = dyn_cast<BranchInst>(PB->getTerminator()))
- if (PBI->isConditional() &&
- (BI->getSuccessor(0) == PBI->getSuccessor(0) ||
- BI->getSuccessor(0) == PBI->getSuccessor(1))) {
- for (auto I = BB->instructionsWithoutDebug().begin(),
- E = BB->instructionsWithoutDebug().end();
- I != E;) {
- Instruction *Curr = &*I++;
- if (isa<CmpInst>(Curr)) {
- Cond = Curr;
- break;
- }
- // Quit if we can't remove this instruction.
- if (!tryCSEWithPredecessor(Curr, PB))
- return false;
- }
- }
-
- if (!Cond)
- return false;
- }
-
- if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
- Cond->getParent() != BB || !Cond->hasOneUse())
- return false;
-
- // Make sure the instruction after the condition is the cond branch.
- BasicBlock::iterator CondIt = ++Cond->getIterator();
-
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(CondIt))
- ++CondIt;
-
- if (&*CondIt != BI)
- return false;
-
- // Only allow this transformation if computing the condition doesn't involve
- // too many instructions and these involved instructions can be executed
- // unconditionally. We denote all involved instructions except the condition
- // as "bonus instructions", and only allow this transformation when the
- // number of the bonus instructions we'll need to create when cloning into
- // each predecessor does not exceed a certain threshold.
- unsigned NumBonusInsts = 0;
- for (auto I = BB->begin(); Cond != &*I; ++I) {
- // Ignore dbg intrinsics.
- if (isa<DbgInfoIntrinsic>(I))
- continue;
- if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(&*I))
- return false;
- // I has only one use and can be executed unconditionally.
- Instruction *User = dyn_cast<Instruction>(I->user_back());
- if (User == nullptr || User->getParent() != BB)
- return false;
- // I is used in the same BB. Since BI uses Cond and doesn't have more slots
- // to use any other instruction, User must be an instruction between next(I)
- // and Cond.
-
- // Account for the cost of duplicating this instruction into each
- // predecessor.
- NumBonusInsts += PredCount;
- // Early exits once we reach the limit.
- if (NumBonusInsts > BonusInstThreshold)
- return false;
- }
-
- // Cond is known to be a compare or binary operator. Check to make sure that
- // neither operand is a potentially-trapping constant expression.
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0)))
- if (CE->canTrap())
- return false;
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
- if (CE->canTrap())
- return false;
-
- // Finally, don't infinitely unroll conditional loops.
- BasicBlock *TrueDest = BI->getSuccessor(0);
- BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr;
- if (TrueDest == BB || FalseDest == BB)
- return false;
-
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *PredBlock = *PI;
- BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
-
- // Check that we have two conditional branches. If there is a PHI node in
- // the common successor, verify that the same value flows in from both
- // blocks.
- SmallVector<PHINode *, 4> PHIs;
- if (!PBI || PBI->isUnconditional() ||
- (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) ||
- (!BI->isConditional() &&
- !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs)))
- continue;
-
- // Determine if the two branches share a common destination.
- Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd;
- bool InvertPredCond = false;
-
- if (BI->isConditional()) {
- if (PBI->getSuccessor(0) == TrueDest) {
- Opc = Instruction::Or;
- } else if (PBI->getSuccessor(1) == FalseDest) {
- Opc = Instruction::And;
- } else if (PBI->getSuccessor(0) == FalseDest) {
- Opc = Instruction::And;
- InvertPredCond = true;
- } else if (PBI->getSuccessor(1) == TrueDest) {
- Opc = Instruction::Or;
- InvertPredCond = true;
- } else {
- continue;
- }
- } else {
- if (PBI->getSuccessor(0) != TrueDest && PBI->getSuccessor(1) != TrueDest)
- continue;
- }
-
- LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
- IRBuilder<> Builder(PBI);
-
- // If we need to invert the condition in the pred block to match, do so now.
- if (InvertPredCond) {
- Value *NewCond = PBI->getCondition();
-
- if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
- CmpInst *CI = cast<CmpInst>(NewCond);
- CI->setPredicate(CI->getInversePredicate());
- } else {
- NewCond =
- Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not");
- }
-
- PBI->setCondition(NewCond);
- PBI->swapSuccessors();
- }
-
- // If we have bonus instructions, clone them into the predecessor block.
- // Note that there may be multiple predecessor blocks, so we cannot move
- // bonus instructions to a predecessor block.
- ValueToValueMapTy VMap; // maps original values to cloned values
- // We already make sure Cond is the last instruction before BI. Therefore,
- // all instructions before Cond other than DbgInfoIntrinsic are bonus
- // instructions.
- for (auto BonusInst = BB->begin(); Cond != &*BonusInst; ++BonusInst) {
- if (isa<DbgInfoIntrinsic>(BonusInst))
- continue;
- Instruction *NewBonusInst = BonusInst->clone();
- RemapInstruction(NewBonusInst, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- VMap[&*BonusInst] = NewBonusInst;
-
- // If we moved a load, we cannot any longer claim any knowledge about
- // its potential value. The previous information might have been valid
- // only given the branch precondition.
- // For an analogous reason, we must also drop all the metadata whose
- // semantics we don't understand.
- NewBonusInst->dropUnknownNonDebugMetadata();
-
- PredBlock->getInstList().insert(PBI->getIterator(), NewBonusInst);
- NewBonusInst->takeName(&*BonusInst);
- BonusInst->setName(BonusInst->getName() + ".old");
- }
-
- // Clone Cond into the predecessor basic block, and or/and the
- // two conditions together.
- Instruction *CondInPred = Cond->clone();
- RemapInstruction(CondInPred, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- PredBlock->getInstList().insert(PBI->getIterator(), CondInPred);
- CondInPred->takeName(Cond);
- Cond->setName(CondInPred->getName() + ".old");
-
- if (BI->isConditional()) {
- Instruction *NewCond = cast<Instruction>(
- Builder.CreateBinOp(Opc, PBI->getCondition(), CondInPred, "or.cond"));
- PBI->setCondition(NewCond);
-
- uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
- bool HasWeights =
- extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
- SuccTrueWeight, SuccFalseWeight);
- SmallVector<uint64_t, 8> NewWeights;
-
- if (PBI->getSuccessor(0) == BB) {
- if (HasWeights) {
- // PBI: br i1 %x, BB, FalseDest
- // BI: br i1 %y, TrueDest, FalseDest
- // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
- NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
- // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
- // TrueWeight for PBI * FalseWeight for BI.
- // We assume that total weights of a BranchInst can fit into 32 bits.
- // Therefore, we will not have overflow using 64-bit arithmetic.
- NewWeights.push_back(PredFalseWeight *
- (SuccFalseWeight + SuccTrueWeight) +
- PredTrueWeight * SuccFalseWeight);
- }
- AddPredecessorToBlock(TrueDest, PredBlock, BB, MSSAU);
- PBI->setSuccessor(0, TrueDest);
- }
- if (PBI->getSuccessor(1) == BB) {
- if (HasWeights) {
- // PBI: br i1 %x, TrueDest, BB
- // BI: br i1 %y, TrueDest, FalseDest
- // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
- // FalseWeight for PBI * TrueWeight for BI.
- NewWeights.push_back(PredTrueWeight *
- (SuccFalseWeight + SuccTrueWeight) +
- PredFalseWeight * SuccTrueWeight);
- // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
- NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
- }
- AddPredecessorToBlock(FalseDest, PredBlock, BB, MSSAU);
- PBI->setSuccessor(1, FalseDest);
- }
- if (NewWeights.size() == 2) {
- // Halve the weights if any of them cannot fit in an uint32_t
- FitWeights(NewWeights);
-
- SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),
- NewWeights.end());
- setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
- } else
- PBI->setMetadata(LLVMContext::MD_prof, nullptr);
- } else {
- // Update PHI nodes in the common successors.
- for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
- ConstantInt *PBI_C = cast<ConstantInt>(
- PHIs[i]->getIncomingValueForBlock(PBI->getParent()));
- assert(PBI_C->getType()->isIntegerTy(1));
- Instruction *MergedCond = nullptr;
- if (PBI->getSuccessor(0) == TrueDest) {
- // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value)
- // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value)
- // is false: !PBI_Cond and BI_Value
- Instruction *NotCond = cast<Instruction>(
- Builder.CreateNot(PBI->getCondition(), "not.cond"));
- MergedCond = cast<Instruction>(
- Builder.CreateBinOp(Instruction::And, NotCond, CondInPred,
- "and.cond"));
- if (PBI_C->isOne())
- MergedCond = cast<Instruction>(Builder.CreateBinOp(
- Instruction::Or, PBI->getCondition(), MergedCond, "or.cond"));
- } else {
- // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C)
- // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond)
- // is false: PBI_Cond and BI_Value
- MergedCond = cast<Instruction>(Builder.CreateBinOp(
- Instruction::And, PBI->getCondition(), CondInPred, "and.cond"));
- if (PBI_C->isOne()) {
- Instruction *NotCond = cast<Instruction>(
- Builder.CreateNot(PBI->getCondition(), "not.cond"));
- MergedCond = cast<Instruction>(Builder.CreateBinOp(
- Instruction::Or, NotCond, MergedCond, "or.cond"));
- }
- }
- // Update PHI Node.
- PHIs[i]->setIncomingValueForBlock(PBI->getParent(), MergedCond);
- }
-
- // PBI is changed to branch to TrueDest below. Remove itself from
- // potential phis from all other successors.
- if (MSSAU)
- MSSAU->changeCondBranchToUnconditionalTo(PBI, TrueDest);
-
- // Change PBI from Conditional to Unconditional.
- BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI);
- EraseTerminatorAndDCECond(PBI, MSSAU);
- PBI = New_PBI;
- }
-
- // If BI was a loop latch, it may have had associated loop metadata.
- // We need to copy it to the new latch, that is, PBI.
- if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
- PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
-
- // TODO: If BB is reachable from all paths through PredBlock, then we
- // could replace PBI's branch probabilities with BI's.
-
- // Copy any debug value intrinsics into the end of PredBlock.
- for (Instruction &I : *BB)
- if (isa<DbgInfoIntrinsic>(I))
- I.clone()->insertBefore(PBI);
-
- return true;
- }
- return false;
-}
-
-// If there is only one store in BB1 and BB2, return it, otherwise return
-// nullptr.
-static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) {
- StoreInst *S = nullptr;
- for (auto *BB : {BB1, BB2}) {
- if (!BB)
- continue;
- for (auto &I : *BB)
- if (auto *SI = dyn_cast<StoreInst>(&I)) {
- if (S)
- // Multiple stores seen.
- return nullptr;
- else
- S = SI;
- }
- }
- return S;
-}
-
-static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
- Value *AlternativeV = nullptr) {
- // PHI is going to be a PHI node that allows the value V that is defined in
- // BB to be referenced in BB's only successor.
- //
- // If AlternativeV is nullptr, the only value we care about in PHI is V. It
- // doesn't matter to us what the other operand is (it'll never get used). We
- // could just create a new PHI with an undef incoming value, but that could
- // increase register pressure if EarlyCSE/InstCombine can't fold it with some
- // other PHI. So here we directly look for some PHI in BB's successor with V
- // as an incoming operand. If we find one, we use it, else we create a new
- // one.
- //
- // If AlternativeV is not nullptr, we care about both incoming values in PHI.
- // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
- // where OtherBB is the single other predecessor of BB's only successor.
- PHINode *PHI = nullptr;
- BasicBlock *Succ = BB->getSingleSuccessor();
-
- for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
- if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
- PHI = cast<PHINode>(I);
- if (!AlternativeV)
- break;
-
- assert(Succ->hasNPredecessors(2));
- auto PredI = pred_begin(Succ);
- BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
- if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
- break;
- PHI = nullptr;
- }
- if (PHI)
- return PHI;
-
- // If V is not an instruction defined in BB, just return it.
- if (!AlternativeV &&
- (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
- return V;
-
- PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front());
- PHI->addIncoming(V, BB);
- for (BasicBlock *PredBB : predecessors(Succ))
- if (PredBB != BB)
- PHI->addIncoming(
- AlternativeV ? AlternativeV : UndefValue::get(V->getType()), PredBB);
- return PHI;
-}
-
-static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
- BasicBlock *QTB, BasicBlock *QFB,
- BasicBlock *PostBB, Value *Address,
- bool InvertPCond, bool InvertQCond,
- const DataLayout &DL) {
- auto IsaBitcastOfPointerType = [](const Instruction &I) {
- return Operator::getOpcode(&I) == Instruction::BitCast &&
- I.getType()->isPointerTy();
- };
-
- // If we're not in aggressive mode, we only optimize if we have some
- // confidence that by optimizing we'll allow P and/or Q to be if-converted.
- auto IsWorthwhile = [&](BasicBlock *BB) {
- if (!BB)
- return true;
- // Heuristic: if the block can be if-converted/phi-folded and the
- // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
- // thread this store.
- unsigned N = 0;
- for (auto &I : BB->instructionsWithoutDebug()) {
- // Cheap instructions viable for folding.
- if (isa<BinaryOperator>(I) || isa<GetElementPtrInst>(I) ||
- isa<StoreInst>(I))
- ++N;
- // Free instructions.
- else if (I.isTerminator() || IsaBitcastOfPointerType(I))
- continue;
- else
- return false;
- }
- // The store we want to merge is counted in N, so add 1 to make sure
- // we're counting the instructions that would be left.
- return N <= (PHINodeFoldingThreshold + 1);
- };
-
- if (!MergeCondStoresAggressively &&
- (!IsWorthwhile(PTB) || !IsWorthwhile(PFB) || !IsWorthwhile(QTB) ||
- !IsWorthwhile(QFB)))
- return false;
-
- // For every pointer, there must be exactly two stores, one coming from
- // PTB or PFB, and the other from QTB or QFB. We don't support more than one
- // store (to any address) in PTB,PFB or QTB,QFB.
- // FIXME: We could relax this restriction with a bit more work and performance
- // testing.
- StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
- StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
- if (!PStore || !QStore)
- return false;
-
- // Now check the stores are compatible.
- if (!QStore->isUnordered() || !PStore->isUnordered())
- return false;
-
- // Check that sinking the store won't cause program behavior changes. Sinking
- // the store out of the Q blocks won't change any behavior as we're sinking
- // from a block to its unconditional successor. But we're moving a store from
- // the P blocks down through the middle block (QBI) and past both QFB and QTB.
- // So we need to check that there are no aliasing loads or stores in
- // QBI, QTB and QFB. We also need to check there are no conflicting memory
- // operations between PStore and the end of its parent block.
- //
- // The ideal way to do this is to query AliasAnalysis, but we don't
- // preserve AA currently so that is dangerous. Be super safe and just
- // check there are no other memory operations at all.
- for (auto &I : *QFB->getSinglePredecessor())
- if (I.mayReadOrWriteMemory())
- return false;
- for (auto &I : *QFB)
- if (&I != QStore && I.mayReadOrWriteMemory())
- return false;
- if (QTB)
- for (auto &I : *QTB)
- if (&I != QStore && I.mayReadOrWriteMemory())
- return false;
- for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
- I != E; ++I)
- if (&*I != PStore && I->mayReadOrWriteMemory())
- return false;
-
- // If PostBB has more than two predecessors, we need to split it so we can
- // sink the store.
- if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
- // We know that QFB's only successor is PostBB. And QFB has a single
- // predecessor. If QTB exists, then its only successor is also PostBB.
- // If QTB does not exist, then QFB's only predecessor has a conditional
- // branch to QFB and PostBB.
- BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
- BasicBlock *NewBB = SplitBlockPredecessors(PostBB, { QFB, TruePred},
- "condstore.split");
- if (!NewBB)
- return false;
- PostBB = NewBB;
- }
-
- // OK, we're going to sink the stores to PostBB. The store has to be
- // conditional though, so first create the predicate.
- Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
- ->getCondition();
- Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
- ->getCondition();
-
- Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(),
- PStore->getParent());
- Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(),
- QStore->getParent(), PPHI);
-
- IRBuilder<> QB(&*PostBB->getFirstInsertionPt());
-
- Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
- Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
-
- if (InvertPCond)
- PPred = QB.CreateNot(PPred);
- if (InvertQCond)
- QPred = QB.CreateNot(QPred);
- Value *CombinedPred = QB.CreateOr(PPred, QPred);
-
- auto *T =
- SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), false);
- QB.SetInsertPoint(T);
- StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
- AAMDNodes AAMD;
- PStore->getAAMetadata(AAMD, /*Merge=*/false);
- PStore->getAAMetadata(AAMD, /*Merge=*/true);
- SI->setAAMetadata(AAMD);
- unsigned PAlignment = PStore->getAlignment();
- unsigned QAlignment = QStore->getAlignment();
- unsigned TypeAlignment =
- DL.getABITypeAlignment(SI->getValueOperand()->getType());
- unsigned MinAlignment;
- unsigned MaxAlignment;
- std::tie(MinAlignment, MaxAlignment) = std::minmax(PAlignment, QAlignment);
- // Choose the minimum alignment. If we could prove both stores execute, we
- // could use biggest one. In this case, though, we only know that one of the
- // stores executes. And we don't know it's safe to take the alignment from a
- // store that doesn't execute.
- if (MinAlignment != 0) {
- // Choose the minimum of all non-zero alignments.
- SI->setAlignment(MinAlignment);
- } else if (MaxAlignment != 0) {
- // Choose the minimal alignment between the non-zero alignment and the ABI
- // default alignment for the type of the stored value.
- SI->setAlignment(std::min(MaxAlignment, TypeAlignment));
- } else {
- // If both alignments are zero, use ABI default alignment for the type of
- // the stored value.
- SI->setAlignment(TypeAlignment);
- }
-
- QStore->eraseFromParent();
- PStore->eraseFromParent();
-
- return true;
-}
-
-static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
- const DataLayout &DL) {
- // The intention here is to find diamonds or triangles (see below) where each
- // conditional block contains a store to the same address. Both of these
- // stores are conditional, so they can't be unconditionally sunk. But it may
- // be profitable to speculatively sink the stores into one merged store at the
- // end, and predicate the merged store on the union of the two conditions of
- // PBI and QBI.
- //
- // This can reduce the number of stores executed if both of the conditions are
- // true, and can allow the blocks to become small enough to be if-converted.
- // This optimization will also chain, so that ladders of test-and-set
- // sequences can be if-converted away.
- //
- // We only deal with simple diamonds or triangles:
- //
- // PBI or PBI or a combination of the two
- // / \ | \
- // PTB PFB | PFB
- // \ / | /
- // QBI QBI
- // / \ | \
- // QTB QFB | QFB
- // \ / | /
- // PostBB PostBB
- //
- // We model triangles as a type of diamond with a nullptr "true" block.
- // Triangles are canonicalized so that the fallthrough edge is represented by
- // a true condition, as in the diagram above.
- BasicBlock *PTB = PBI->getSuccessor(0);
- BasicBlock *PFB = PBI->getSuccessor(1);
- BasicBlock *QTB = QBI->getSuccessor(0);
- BasicBlock *QFB = QBI->getSuccessor(1);
- BasicBlock *PostBB = QFB->getSingleSuccessor();
-
- // Make sure we have a good guess for PostBB. If QTB's only successor is
- // QFB, then QFB is a better PostBB.
- if (QTB->getSingleSuccessor() == QFB)
- PostBB = QFB;
-
- // If we couldn't find a good PostBB, stop.
- if (!PostBB)
- return false;
-
- bool InvertPCond = false, InvertQCond = false;
- // Canonicalize fallthroughs to the true branches.
- if (PFB == QBI->getParent()) {
- std::swap(PFB, PTB);
- InvertPCond = true;
- }
- if (QFB == PostBB) {
- std::swap(QFB, QTB);
- InvertQCond = true;
- }
-
- // From this point on we can assume PTB or QTB may be fallthroughs but PFB
- // and QFB may not. Model fallthroughs as a nullptr block.
- if (PTB == QBI->getParent())
- PTB = nullptr;
- if (QTB == PostBB)
- QTB = nullptr;
-
- // Legality bailouts. We must have at least the non-fallthrough blocks and
- // the post-dominating block, and the non-fallthroughs must only have one
- // predecessor.
- auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
- return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
- };
- if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
- !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
- return false;
- if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
- (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
- return false;
- if (!QBI->getParent()->hasNUses(2))
- return false;
-
- // OK, this is a sequence of two diamonds or triangles.
- // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
- SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
- for (auto *BB : {PTB, PFB}) {
- if (!BB)
- continue;
- for (auto &I : *BB)
- if (StoreInst *SI = dyn_cast<StoreInst>(&I))
- PStoreAddresses.insert(SI->getPointerOperand());
- }
- for (auto *BB : {QTB, QFB}) {
- if (!BB)
- continue;
- for (auto &I : *BB)
- if (StoreInst *SI = dyn_cast<StoreInst>(&I))
- QStoreAddresses.insert(SI->getPointerOperand());
- }
-
- set_intersect(PStoreAddresses, QStoreAddresses);
- // set_intersect mutates PStoreAddresses in place. Rename it here to make it
- // clear what it contains.
- auto &CommonAddresses = PStoreAddresses;
-
- bool Changed = false;
- for (auto *Address : CommonAddresses)
- Changed |= mergeConditionalStoreToAddress(
- PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL);
- return Changed;
-}
-
-/// If we have a conditional branch as a predecessor of another block,
-/// this function tries to simplify it. We know
-/// that PBI and BI are both conditional branches, and BI is in one of the
-/// successor blocks of PBI - PBI branches to BI.
-static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
- const DataLayout &DL) {
- assert(PBI->isConditional() && BI->isConditional());
- BasicBlock *BB = BI->getParent();
-
- // If this block ends with a branch instruction, and if there is a
- // predecessor that ends on a branch of the same condition, make
- // this conditional branch redundant.
- if (PBI->getCondition() == BI->getCondition() &&
- PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
- // Okay, the outcome of this conditional branch is statically
- // knowable. If this block had a single pred, handle specially.
- if (BB->getSinglePredecessor()) {
- // Turn this into a branch on constant.
- bool CondIsTrue = PBI->getSuccessor(0) == BB;
- BI->setCondition(
- ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
- return true; // Nuke the branch on constant.
- }
-
- // Otherwise, if there are multiple predecessors, insert a PHI that merges
- // in the constant and simplify the block result. Subsequent passes of
- // simplifycfg will thread the block.
- if (BlockIsSimpleEnoughToThreadThrough(BB)) {
- pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
- PHINode *NewPN = PHINode::Create(
- Type::getInt1Ty(BB->getContext()), std::distance(PB, PE),
- BI->getCondition()->getName() + ".pr", &BB->front());
- // Okay, we're going to insert the PHI node. Since PBI is not the only
- // predecessor, compute the PHI'd conditional value for all of the preds.
- // Any predecessor where the condition is not computable we keep symbolic.
- for (pred_iterator PI = PB; PI != PE; ++PI) {
- BasicBlock *P = *PI;
- if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) && PBI != BI &&
- PBI->isConditional() && PBI->getCondition() == BI->getCondition() &&
- PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
- bool CondIsTrue = PBI->getSuccessor(0) == BB;
- NewPN->addIncoming(
- ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue),
- P);
- } else {
- NewPN->addIncoming(BI->getCondition(), P);
- }
- }
-
- BI->setCondition(NewPN);
- return true;
- }
- }
-
- if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
- if (CE->canTrap())
- return false;
-
- // If both branches are conditional and both contain stores to the same
- // address, remove the stores from the conditionals and create a conditional
- // merged store at the end.
- if (MergeCondStores && mergeConditionalStores(PBI, BI, DL))
- return true;
-
- // If this is a conditional branch in an empty block, and if any
- // predecessors are a conditional branch to one of our destinations,
- // fold the conditions into logical ops and one cond br.
-
- // Ignore dbg intrinsics.
- if (&*BB->instructionsWithoutDebug().begin() != BI)
- return false;
-
- int PBIOp, BIOp;
- if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
- PBIOp = 0;
- BIOp = 0;
- } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
- PBIOp = 0;
- BIOp = 1;
- } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
- PBIOp = 1;
- BIOp = 0;
- } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
- PBIOp = 1;
- BIOp = 1;
- } else {
- return false;
- }
-
- // Check to make sure that the other destination of this branch
- // isn't BB itself. If so, this is an infinite loop that will
- // keep getting unwound.
- if (PBI->getSuccessor(PBIOp) == BB)
- return false;
-
- // Do not perform this transformation if it would require
- // insertion of a large number of select instructions. For targets
- // without predication/cmovs, this is a big pessimization.
-
- // Also do not perform this transformation if any phi node in the common
- // destination block can trap when reached by BB or PBB (PR17073). In that
- // case, it would be unsafe to hoist the operation into a select instruction.
-
- BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
- unsigned NumPhis = 0;
- for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
- ++II, ++NumPhis) {
- if (NumPhis > 2) // Disable this xform.
- return false;
-
- PHINode *PN = cast<PHINode>(II);
- Value *BIV = PN->getIncomingValueForBlock(BB);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BIV))
- if (CE->canTrap())
- return false;
-
- unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
- Value *PBIV = PN->getIncomingValue(PBBIdx);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(PBIV))
- if (CE->canTrap())
- return false;
- }
-
- // Finally, if everything is ok, fold the branches to logical ops.
- BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
-
- LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
- << "AND: " << *BI->getParent());
-
- // If OtherDest *is* BB, then BB is a basic block with a single conditional
- // branch in it, where one edge (OtherDest) goes back to itself but the other
- // exits. We don't *know* that the program avoids the infinite loop
- // (even though that seems likely). If we do this xform naively, we'll end up
- // recursively unpeeling the loop. Since we know that (after the xform is
- // done) that the block *is* infinite if reached, we just make it an obviously
- // infinite loop with no cond branch.
- if (OtherDest == BB) {
- // Insert it at the end of the function, because it's either code,
- // or it won't matter if it's hot. :)
- BasicBlock *InfLoopBlock =
- BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
- BranchInst::Create(InfLoopBlock, InfLoopBlock);
- OtherDest = InfLoopBlock;
- }
-
- LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
-
- // BI may have other predecessors. Because of this, we leave
- // it alone, but modify PBI.
-
- // Make sure we get to CommonDest on True&True directions.
- Value *PBICond = PBI->getCondition();
- IRBuilder<NoFolder> Builder(PBI);
- if (PBIOp)
- PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
-
- Value *BICond = BI->getCondition();
- if (BIOp)
- BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
-
- // Merge the conditions.
- Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge");
-
- // Modify PBI to branch on the new condition to the new dests.
- PBI->setCondition(Cond);
- PBI->setSuccessor(0, CommonDest);
- PBI->setSuccessor(1, OtherDest);
-
- // Update branch weight for PBI.
- uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
- uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
- bool HasWeights =
- extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
- SuccTrueWeight, SuccFalseWeight);
- if (HasWeights) {
- PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
- PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
- SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
- SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
- // The weight to CommonDest should be PredCommon * SuccTotal +
- // PredOther * SuccCommon.
- // The weight to OtherDest should be PredOther * SuccOther.
- uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
- PredOther * SuccCommon,
- PredOther * SuccOther};
- // Halve the weights if any of them cannot fit in an uint32_t
- FitWeights(NewWeights);
-
- setBranchWeights(PBI, NewWeights[0], NewWeights[1]);
- }
-
- // OtherDest may have phi nodes. If so, add an entry from PBI's
- // block that are identical to the entries for BI's block.
- AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
-
- // We know that the CommonDest already had an edge from PBI to
- // it. If it has PHIs though, the PHIs may have different
- // entries for BB and PBI's BB. If so, insert a select to make
- // them agree.
- for (PHINode &PN : CommonDest->phis()) {
- Value *BIV = PN.getIncomingValueForBlock(BB);
- unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
- Value *PBIV = PN.getIncomingValue(PBBIdx);
- if (BIV != PBIV) {
- // Insert a select in PBI to pick the right value.
- SelectInst *NV = cast<SelectInst>(
- Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
- PN.setIncomingValue(PBBIdx, NV);
- // Although the select has the same condition as PBI, the original branch
- // weights for PBI do not apply to the new select because the select's
- // 'logical' edges are incoming edges of the phi that is eliminated, not
- // the outgoing edges of PBI.
- if (HasWeights) {
- uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
- uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
- uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
- uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
- // The weight to PredCommonDest should be PredCommon * SuccTotal.
- // The weight to PredOtherDest should be PredOther * SuccCommon.
- uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
- PredOther * SuccCommon};
-
- FitWeights(NewWeights);
-
- setBranchWeights(NV, NewWeights[0], NewWeights[1]);
- }
- }
- }
-
- LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
- LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
-
- // This basic block is probably dead. We know it has at least
- // one fewer predecessor.
- return true;
-}
-
-// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
-// true or to FalseBB if Cond is false.
-// Takes care of updating the successors and removing the old terminator.
-// Also makes sure not to introduce new successors by assuming that edges to
-// non-successor TrueBBs and FalseBBs aren't reachable.
-static bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
- BasicBlock *TrueBB, BasicBlock *FalseBB,
- uint32_t TrueWeight,
- uint32_t FalseWeight) {
- // Remove any superfluous successor edges from the CFG.
- // First, figure out which successors to preserve.
- // If TrueBB and FalseBB are equal, only try to preserve one copy of that
- // successor.
- BasicBlock *KeepEdge1 = TrueBB;
- BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
-
- // Then remove the rest.
- for (BasicBlock *Succ : successors(OldTerm)) {
- // Make sure only to keep exactly one copy of each edge.
- if (Succ == KeepEdge1)
- KeepEdge1 = nullptr;
- else if (Succ == KeepEdge2)
- KeepEdge2 = nullptr;
- else
- Succ->removePredecessor(OldTerm->getParent(),
- /*KeepOneInputPHIs=*/true);
- }
-
- IRBuilder<> Builder(OldTerm);
- Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
-
- // Insert an appropriate new terminator.
- if (!KeepEdge1 && !KeepEdge2) {
- if (TrueBB == FalseBB)
- // We were only looking for one successor, and it was present.
- // Create an unconditional branch to it.
- Builder.CreateBr(TrueBB);
- else {
- // We found both of the successors we were looking for.
- // Create a conditional branch sharing the condition of the select.
- BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
- if (TrueWeight != FalseWeight)
- setBranchWeights(NewBI, TrueWeight, FalseWeight);
- }
- } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
- // Neither of the selected blocks were successors, so this
- // terminator must be unreachable.
- new UnreachableInst(OldTerm->getContext(), OldTerm);
- } else {
- // One of the selected values was a successor, but the other wasn't.
- // Insert an unconditional branch to the one that was found;
- // the edge to the one that wasn't must be unreachable.
- if (!KeepEdge1)
- // Only TrueBB was found.
- Builder.CreateBr(TrueBB);
- else
- // Only FalseBB was found.
- Builder.CreateBr(FalseBB);
- }
-
- EraseTerminatorAndDCECond(OldTerm);
- return true;
-}
-
-// Replaces
-// (switch (select cond, X, Y)) on constant X, Y
-// with a branch - conditional if X and Y lead to distinct BBs,
-// unconditional otherwise.
-static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
- // Check for constant integer values in the select.
- ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
- ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
- if (!TrueVal || !FalseVal)
- return false;
-
- // Find the relevant condition and destinations.
- Value *Condition = Select->getCondition();
- BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
- BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
-
- // Get weight for TrueBB and FalseBB.
- uint32_t TrueWeight = 0, FalseWeight = 0;
- SmallVector<uint64_t, 8> Weights;
- bool HasWeights = HasBranchWeights(SI);
- if (HasWeights) {
- GetBranchWeights(SI, Weights);
- if (Weights.size() == 1 + SI->getNumCases()) {
- TrueWeight =
- (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
- FalseWeight =
- (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
- }
- }
-
- // Perform the actual simplification.
- return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
- FalseWeight);
-}
-
-// Replaces
-// (indirectbr (select cond, blockaddress(@fn, BlockA),
-// blockaddress(@fn, BlockB)))
-// with
-// (br cond, BlockA, BlockB).
-static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
- // Check that both operands of the select are block addresses.
- BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
- BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
- if (!TBA || !FBA)
- return false;
-
- // Extract the actual blocks.
- BasicBlock *TrueBB = TBA->getBasicBlock();
- BasicBlock *FalseBB = FBA->getBasicBlock();
-
- // Perform the actual simplification.
- return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
- 0);
-}
-
-/// This is called when we find an icmp instruction
-/// (a seteq/setne with a constant) as the only instruction in a
-/// block that ends with an uncond branch. We are looking for a very specific
-/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
-/// this case, we merge the first two "or's of icmp" into a switch, but then the
-/// default value goes to an uncond block with a seteq in it, we get something
-/// like:
-///
-/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
-/// DEFAULT:
-/// %tmp = icmp eq i8 %A, 92
-/// br label %end
-/// end:
-/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
-///
-/// We prefer to split the edge to 'end' so that there is a true/false entry to
-/// the PHI, merging the third icmp into the switch.
-bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
- ICmpInst *ICI, IRBuilder<> &Builder) {
- BasicBlock *BB = ICI->getParent();
-
- // If the block has any PHIs in it or the icmp has multiple uses, it is too
- // complex.
- if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
- return false;
-
- Value *V = ICI->getOperand(0);
- ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
-
- // The pattern we're looking for is where our only predecessor is a switch on
- // 'V' and this block is the default case for the switch. In this case we can
- // fold the compared value into the switch to simplify things.
- BasicBlock *Pred = BB->getSinglePredecessor();
- if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
- return false;
-
- SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
- if (SI->getCondition() != V)
- return false;
-
- // If BB is reachable on a non-default case, then we simply know the value of
- // V in this block. Substitute it and constant fold the icmp instruction
- // away.
- if (SI->getDefaultDest() != BB) {
- ConstantInt *VVal = SI->findCaseDest(BB);
- assert(VVal && "Should have a unique destination value");
- ICI->setOperand(0, VVal);
-
- if (Value *V = SimplifyInstruction(ICI, {DL, ICI})) {
- ICI->replaceAllUsesWith(V);
- ICI->eraseFromParent();
- }
- // BB is now empty, so it is likely to simplify away.
- return requestResimplify();
- }
-
- // Ok, the block is reachable from the default dest. If the constant we're
- // comparing exists in one of the other edges, then we can constant fold ICI
- // and zap it.
- if (SI->findCaseValue(Cst) != SI->case_default()) {
- Value *V;
- if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
- V = ConstantInt::getFalse(BB->getContext());
- else
- V = ConstantInt::getTrue(BB->getContext());
-
- ICI->replaceAllUsesWith(V);
- ICI->eraseFromParent();
- // BB is now empty, so it is likely to simplify away.
- return requestResimplify();
- }
-
- // The use of the icmp has to be in the 'end' block, by the only PHI node in
- // the block.
- BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
- PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
- if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
- isa<PHINode>(++BasicBlock::iterator(PHIUse)))
- return false;
-
- // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
- // true in the PHI.
- Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
- Constant *NewCst = ConstantInt::getFalse(BB->getContext());
-
- if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
- std::swap(DefaultCst, NewCst);
-
- // Replace ICI (which is used by the PHI for the default value) with true or
- // false depending on if it is EQ or NE.
- ICI->replaceAllUsesWith(DefaultCst);
- ICI->eraseFromParent();
-
- // Okay, the switch goes to this block on a default value. Add an edge from
- // the switch to the merge point on the compared value.
- BasicBlock *NewBB =
- BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
- {
- SwitchInstProfUpdateWrapper SIW(*SI);
- auto W0 = SIW.getSuccessorWeight(0);
- SwitchInstProfUpdateWrapper::CaseWeightOpt NewW;
- if (W0) {
- NewW = ((uint64_t(*W0) + 1) >> 1);
- SIW.setSuccessorWeight(0, *NewW);
- }
- SIW.addCase(Cst, NewBB, NewW);
- }
-
- // NewBB branches to the phi block, add the uncond branch and the phi entry.
- Builder.SetInsertPoint(NewBB);
- Builder.SetCurrentDebugLocation(SI->getDebugLoc());
- Builder.CreateBr(SuccBlock);
- PHIUse->addIncoming(NewCst, NewBB);
- return true;
-}
-
-/// The specified branch is a conditional branch.
-/// Check to see if it is branching on an or/and chain of icmp instructions, and
-/// fold it into a switch instruction if so.
-static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
- const DataLayout &DL) {
- Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
- if (!Cond)
- return false;
-
- // Change br (X == 0 | X == 1), T, F into a switch instruction.
- // If this is a bunch of seteq's or'd together, or if it's a bunch of
- // 'setne's and'ed together, collect them.
-
- // Try to gather values from a chain of and/or to be turned into a switch
- ConstantComparesGatherer ConstantCompare(Cond, DL);
- // Unpack the result
- SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
- Value *CompVal = ConstantCompare.CompValue;
- unsigned UsedICmps = ConstantCompare.UsedICmps;
- Value *ExtraCase = ConstantCompare.Extra;
-
- // If we didn't have a multiply compared value, fail.
- if (!CompVal)
- return false;
-
- // Avoid turning single icmps into a switch.
- if (UsedICmps <= 1)
- return false;
-
- bool TrueWhenEqual = (Cond->getOpcode() == Instruction::Or);
-
- // There might be duplicate constants in the list, which the switch
- // instruction can't handle, remove them now.
- array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
- Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
-
- // If Extra was used, we require at least two switch values to do the
- // transformation. A switch with one value is just a conditional branch.
- if (ExtraCase && Values.size() < 2)
- return false;
-
- // TODO: Preserve branch weight metadata, similarly to how
- // FoldValueComparisonIntoPredecessors preserves it.
-
- // Figure out which block is which destination.
- BasicBlock *DefaultBB = BI->getSuccessor(1);
- BasicBlock *EdgeBB = BI->getSuccessor(0);
- if (!TrueWhenEqual)
- std::swap(DefaultBB, EdgeBB);
-
- BasicBlock *BB = BI->getParent();
-
- LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
- << " cases into SWITCH. BB is:\n"
- << *BB);
-
- // If there are any extra values that couldn't be folded into the switch
- // then we evaluate them with an explicit branch first. Split the block
- // right before the condbr to handle it.
- if (ExtraCase) {
- BasicBlock *NewBB =
- BB->splitBasicBlock(BI->getIterator(), "switch.early.test");
- // Remove the uncond branch added to the old block.
- Instruction *OldTI = BB->getTerminator();
- Builder.SetInsertPoint(OldTI);
-
- if (TrueWhenEqual)
- Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
- else
- Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
-
- OldTI->eraseFromParent();
-
- // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
- // for the edge we just added.
- AddPredecessorToBlock(EdgeBB, BB, NewBB);
-
- LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
- << "\nEXTRABB = " << *BB);
- BB = NewBB;
- }
-
- Builder.SetInsertPoint(BI);
- // Convert pointer to int before we switch.
- if (CompVal->getType()->isPointerTy()) {
- CompVal = Builder.CreatePtrToInt(
- CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
- }
-
- // Create the new switch instruction now.
- SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
-
- // Add all of the 'cases' to the switch instruction.
- for (unsigned i = 0, e = Values.size(); i != e; ++i)
- New->addCase(Values[i], EdgeBB);
-
- // We added edges from PI to the EdgeBB. As such, if there were any
- // PHI nodes in EdgeBB, they need entries to be added corresponding to
- // the number of edges added.
- for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
- PHINode *PN = cast<PHINode>(BBI);
- Value *InVal = PN->getIncomingValueForBlock(BB);
- for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
- PN->addIncoming(InVal, BB);
- }
-
- // Erase the old branch instruction.
- EraseTerminatorAndDCECond(BI);
-
- LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
- return true;
-}
-
-bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
- if (isa<PHINode>(RI->getValue()))
- return SimplifyCommonResume(RI);
- else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
- RI->getValue() == RI->getParent()->getFirstNonPHI())
- // The resume must unwind the exception that caused control to branch here.
- return SimplifySingleResume(RI);
-
- return false;
-}
-
-// Simplify resume that is shared by several landing pads (phi of landing pad).
-bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) {
- BasicBlock *BB = RI->getParent();
-
- // Check that there are no other instructions except for debug intrinsics
- // between the phi of landing pads (RI->getValue()) and resume instruction.
- BasicBlock::iterator I = cast<Instruction>(RI->getValue())->getIterator(),
- E = RI->getIterator();
- while (++I != E)
- if (!isa<DbgInfoIntrinsic>(I))
- return false;
-
- SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
- auto *PhiLPInst = cast<PHINode>(RI->getValue());
-
- // Check incoming blocks to see if any of them are trivial.
- for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
- Idx++) {
- auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
- auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
-
- // If the block has other successors, we can not delete it because
- // it has other dependents.
- if (IncomingBB->getUniqueSuccessor() != BB)
- continue;
-
- auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
- // Not the landing pad that caused the control to branch here.
- if (IncomingValue != LandingPad)
- continue;
-
- bool isTrivial = true;
-
- I = IncomingBB->getFirstNonPHI()->getIterator();
- E = IncomingBB->getTerminator()->getIterator();
- while (++I != E)
- if (!isa<DbgInfoIntrinsic>(I)) {
- isTrivial = false;
- break;
- }
-
- if (isTrivial)
- TrivialUnwindBlocks.insert(IncomingBB);
- }
-
- // If no trivial unwind blocks, don't do any simplifications.
- if (TrivialUnwindBlocks.empty())
- return false;
-
- // Turn all invokes that unwind here into calls.
- for (auto *TrivialBB : TrivialUnwindBlocks) {
- // Blocks that will be simplified should be removed from the phi node.
- // Note there could be multiple edges to the resume block, and we need
- // to remove them all.
- while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
- BB->removePredecessor(TrivialBB, true);
-
- for (pred_iterator PI = pred_begin(TrivialBB), PE = pred_end(TrivialBB);
- PI != PE;) {
- BasicBlock *Pred = *PI++;
- removeUnwindEdge(Pred);
- }
-
- // In each SimplifyCFG run, only the current processed block can be erased.
- // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
- // of erasing TrivialBB, we only remove the branch to the common resume
- // block so that we can later erase the resume block since it has no
- // predecessors.
- TrivialBB->getTerminator()->eraseFromParent();
- new UnreachableInst(RI->getContext(), TrivialBB);
- }
-
- // Delete the resume block if all its predecessors have been removed.
- if (pred_empty(BB))
- BB->eraseFromParent();
-
- return !TrivialUnwindBlocks.empty();
-}
-
-// Simplify resume that is only used by a single (non-phi) landing pad.
-bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) {
- BasicBlock *BB = RI->getParent();
- LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI());
- assert(RI->getValue() == LPInst &&
- "Resume must unwind the exception that caused control to here");
-
- // Check that there are no other instructions except for debug intrinsics.
- BasicBlock::iterator I = LPInst->getIterator(), E = RI->getIterator();
- while (++I != E)
- if (!isa<DbgInfoIntrinsic>(I))
- return false;
-
- // Turn all invokes that unwind here into calls and delete the basic block.
- for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
- BasicBlock *Pred = *PI++;
- removeUnwindEdge(Pred);
- }
-
- // The landingpad is now unreachable. Zap it.
- if (LoopHeaders)
- LoopHeaders->erase(BB);
- BB->eraseFromParent();
- return true;
-}
-
-static bool removeEmptyCleanup(CleanupReturnInst *RI) {
- // If this is a trivial cleanup pad that executes no instructions, it can be
- // eliminated. If the cleanup pad continues to the caller, any predecessor
- // that is an EH pad will be updated to continue to the caller and any
- // predecessor that terminates with an invoke instruction will have its invoke
- // instruction converted to a call instruction. If the cleanup pad being
- // simplified does not continue to the caller, each predecessor will be
- // updated to continue to the unwind destination of the cleanup pad being
- // simplified.
- BasicBlock *BB = RI->getParent();
- CleanupPadInst *CPInst = RI->getCleanupPad();
- if (CPInst->getParent() != BB)
- // This isn't an empty cleanup.
- return false;
-
- // We cannot kill the pad if it has multiple uses. This typically arises
- // from unreachable basic blocks.
- if (!CPInst->hasOneUse())
- return false;
-
- // Check that there are no other instructions except for benign intrinsics.
- BasicBlock::iterator I = CPInst->getIterator(), E = RI->getIterator();
- while (++I != E) {
- auto *II = dyn_cast<IntrinsicInst>(I);
- if (!II)
- return false;
-
- Intrinsic::ID IntrinsicID = II->getIntrinsicID();
- switch (IntrinsicID) {
- case Intrinsic::dbg_declare:
- case Intrinsic::dbg_value:
- case Intrinsic::dbg_label:
- case Intrinsic::lifetime_end:
- break;
- default:
- return false;
- }
- }
-
- // If the cleanup return we are simplifying unwinds to the caller, this will
- // set UnwindDest to nullptr.
- BasicBlock *UnwindDest = RI->getUnwindDest();
- Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
-
- // We're about to remove BB from the control flow. Before we do, sink any
- // PHINodes into the unwind destination. Doing this before changing the
- // control flow avoids some potentially slow checks, since we can currently
- // be certain that UnwindDest and BB have no common predecessors (since they
- // are both EH pads).
- if (UnwindDest) {
- // First, go through the PHI nodes in UnwindDest and update any nodes that
- // reference the block we are removing
- for (BasicBlock::iterator I = UnwindDest->begin(),
- IE = DestEHPad->getIterator();
- I != IE; ++I) {
- PHINode *DestPN = cast<PHINode>(I);
-
- int Idx = DestPN->getBasicBlockIndex(BB);
- // Since BB unwinds to UnwindDest, it has to be in the PHI node.
- assert(Idx != -1);
- // This PHI node has an incoming value that corresponds to a control
- // path through the cleanup pad we are removing. If the incoming
- // value is in the cleanup pad, it must be a PHINode (because we
- // verified above that the block is otherwise empty). Otherwise, the
- // value is either a constant or a value that dominates the cleanup
- // pad being removed.
- //
- // Because BB and UnwindDest are both EH pads, all of their
- // predecessors must unwind to these blocks, and since no instruction
- // can have multiple unwind destinations, there will be no overlap in
- // incoming blocks between SrcPN and DestPN.
- Value *SrcVal = DestPN->getIncomingValue(Idx);
- PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
-
- // Remove the entry for the block we are deleting.
- DestPN->removeIncomingValue(Idx, false);
-
- if (SrcPN && SrcPN->getParent() == BB) {
- // If the incoming value was a PHI node in the cleanup pad we are
- // removing, we need to merge that PHI node's incoming values into
- // DestPN.
- for (unsigned SrcIdx = 0, SrcE = SrcPN->getNumIncomingValues();
- SrcIdx != SrcE; ++SrcIdx) {
- DestPN->addIncoming(SrcPN->getIncomingValue(SrcIdx),
- SrcPN->getIncomingBlock(SrcIdx));
- }
- } else {
- // Otherwise, the incoming value came from above BB and
- // so we can just reuse it. We must associate all of BB's
- // predecessors with this value.
- for (auto *pred : predecessors(BB)) {
- DestPN->addIncoming(SrcVal, pred);
- }
- }
- }
-
- // Sink any remaining PHI nodes directly into UnwindDest.
- Instruction *InsertPt = DestEHPad;
- for (BasicBlock::iterator I = BB->begin(),
- IE = BB->getFirstNonPHI()->getIterator();
- I != IE;) {
- // The iterator must be incremented here because the instructions are
- // being moved to another block.
- PHINode *PN = cast<PHINode>(I++);
- if (PN->use_empty())
- // If the PHI node has no uses, just leave it. It will be erased
- // when we erase BB below.
- continue;
-
- // Otherwise, sink this PHI node into UnwindDest.
- // Any predecessors to UnwindDest which are not already represented
- // must be back edges which inherit the value from the path through
- // BB. In this case, the PHI value must reference itself.
- for (auto *pred : predecessors(UnwindDest))
- if (pred != BB)
- PN->addIncoming(PN, pred);
- PN->moveBefore(InsertPt);
- }
- }
-
- for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
- // The iterator must be updated here because we are removing this pred.
- BasicBlock *PredBB = *PI++;
- if (UnwindDest == nullptr) {
- removeUnwindEdge(PredBB);
- } else {
- Instruction *TI = PredBB->getTerminator();
- TI->replaceUsesOfWith(BB, UnwindDest);
- }
- }
-
- // The cleanup pad is now unreachable. Zap it.
- BB->eraseFromParent();
- return true;
-}
-
-// Try to merge two cleanuppads together.
-static bool mergeCleanupPad(CleanupReturnInst *RI) {
- // Skip any cleanuprets which unwind to caller, there is nothing to merge
- // with.
- BasicBlock *UnwindDest = RI->getUnwindDest();
- if (!UnwindDest)
- return false;
-
- // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
- // be safe to merge without code duplication.
- if (UnwindDest->getSinglePredecessor() != RI->getParent())
- return false;
-
- // Verify that our cleanuppad's unwind destination is another cleanuppad.
- auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
- if (!SuccessorCleanupPad)
- return false;
-
- CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
- // Replace any uses of the successor cleanupad with the predecessor pad
- // The only cleanuppad uses should be this cleanupret, it's cleanupret and
- // funclet bundle operands.
- SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
- // Remove the old cleanuppad.
- SuccessorCleanupPad->eraseFromParent();
- // Now, we simply replace the cleanupret with a branch to the unwind
- // destination.
- BranchInst::Create(UnwindDest, RI->getParent());
- RI->eraseFromParent();
-
- return true;
-}
-
-bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) {
- // It is possible to transiantly have an undef cleanuppad operand because we
- // have deleted some, but not all, dead blocks.
- // Eventually, this block will be deleted.
- if (isa<UndefValue>(RI->getOperand(0)))
- return false;
-
- if (mergeCleanupPad(RI))
- return true;
-
- if (removeEmptyCleanup(RI))
- return true;
-
- return false;
-}
-
-bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
- BasicBlock *BB = RI->getParent();
- if (!BB->getFirstNonPHIOrDbg()->isTerminator())
- return false;
-
- // Find predecessors that end with branches.
- SmallVector<BasicBlock *, 8> UncondBranchPreds;
- SmallVector<BranchInst *, 8> CondBranchPreds;
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *P = *PI;
- Instruction *PTI = P->getTerminator();
- if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
- if (BI->isUnconditional())
- UncondBranchPreds.push_back(P);
- else
- CondBranchPreds.push_back(BI);
- }
- }
-
- // If we found some, do the transformation!
- if (!UncondBranchPreds.empty() && DupRet) {
- while (!UncondBranchPreds.empty()) {
- BasicBlock *Pred = UncondBranchPreds.pop_back_val();
- LLVM_DEBUG(dbgs() << "FOLDING: " << *BB
- << "INTO UNCOND BRANCH PRED: " << *Pred);
- (void)FoldReturnIntoUncondBranch(RI, BB, Pred);
- }
-
- // If we eliminated all predecessors of the block, delete the block now.
- if (pred_empty(BB)) {
- // We know there are no successors, so just nuke the block.
- if (LoopHeaders)
- LoopHeaders->erase(BB);
- BB->eraseFromParent();
- }
-
- return true;
- }
-
- // Check out all of the conditional branches going to this return
- // instruction. If any of them just select between returns, change the
- // branch itself into a select/return pair.
- while (!CondBranchPreds.empty()) {
- BranchInst *BI = CondBranchPreds.pop_back_val();
-
- // Check to see if the non-BB successor is also a return block.
- if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
- isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
- SimplifyCondBranchToTwoReturns(BI, Builder))
- return true;
- }
- return false;
-}
-
-bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
- BasicBlock *BB = UI->getParent();
-
- bool Changed = false;
-
- // If there are any instructions immediately before the unreachable that can
- // be removed, do so.
- while (UI->getIterator() != BB->begin()) {
- BasicBlock::iterator BBI = UI->getIterator();
- --BBI;
- // Do not delete instructions that can have side effects which might cause
- // the unreachable to not be reachable; specifically, calls and volatile
- // operations may have this effect.
- if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI))
- break;
-
- if (BBI->mayHaveSideEffects()) {
- if (auto *SI = dyn_cast<StoreInst>(BBI)) {
- if (SI->isVolatile())
- break;
- } else if (auto *LI = dyn_cast<LoadInst>(BBI)) {
- if (LI->isVolatile())
- break;
- } else if (auto *RMWI = dyn_cast<AtomicRMWInst>(BBI)) {
- if (RMWI->isVolatile())
- break;
- } else if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) {
- if (CXI->isVolatile())
- break;
- } else if (isa<CatchPadInst>(BBI)) {
- // A catchpad may invoke exception object constructors and such, which
- // in some languages can be arbitrary code, so be conservative by
- // default.
- // For CoreCLR, it just involves a type test, so can be removed.
- if (classifyEHPersonality(BB->getParent()->getPersonalityFn()) !=
- EHPersonality::CoreCLR)
- break;
- } else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) &&
- !isa<LandingPadInst>(BBI)) {
- break;
- }
- // Note that deleting LandingPad's here is in fact okay, although it
- // involves a bit of subtle reasoning. If this inst is a LandingPad,
- // all the predecessors of this block will be the unwind edges of Invokes,
- // and we can therefore guarantee this block will be erased.
- }
-
- // Delete this instruction (any uses are guaranteed to be dead)
- if (!BBI->use_empty())
- BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
- BBI->eraseFromParent();
- Changed = true;
- }
-
- // If the unreachable instruction is the first in the block, take a gander
- // at all of the predecessors of this instruction, and simplify them.
- if (&BB->front() != UI)
- return Changed;
-
- SmallVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
- for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
- Instruction *TI = Preds[i]->getTerminator();
- IRBuilder<> Builder(TI);
- if (auto *BI = dyn_cast<BranchInst>(TI)) {
- if (BI->isUnconditional()) {
- if (BI->getSuccessor(0) == BB) {
- new UnreachableInst(TI->getContext(), TI);
- TI->eraseFromParent();
- Changed = true;
- }
- } else {
- Value* Cond = BI->getCondition();
- if (BI->getSuccessor(0) == BB) {
- Builder.CreateAssumption(Builder.CreateNot(Cond));
- Builder.CreateBr(BI->getSuccessor(1));
- EraseTerminatorAndDCECond(BI);
- } else if (BI->getSuccessor(1) == BB) {
- Builder.CreateAssumption(Cond);
- Builder.CreateBr(BI->getSuccessor(0));
- EraseTerminatorAndDCECond(BI);
- Changed = true;
- }
- }
- } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
- SwitchInstProfUpdateWrapper SU(*SI);
- for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
- if (i->getCaseSuccessor() != BB) {
- ++i;
- continue;
- }
- BB->removePredecessor(SU->getParent());
- i = SU.removeCase(i);
- e = SU->case_end();
- Changed = true;
- }
- } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
- if (II->getUnwindDest() == BB) {
- removeUnwindEdge(TI->getParent());
- Changed = true;
- }
- } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
- if (CSI->getUnwindDest() == BB) {
- removeUnwindEdge(TI->getParent());
- Changed = true;
- continue;
- }
-
- for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
- E = CSI->handler_end();
- I != E; ++I) {
- if (*I == BB) {
- CSI->removeHandler(I);
- --I;
- --E;
- Changed = true;
- }
- }
- if (CSI->getNumHandlers() == 0) {
- BasicBlock *CatchSwitchBB = CSI->getParent();
- if (CSI->hasUnwindDest()) {
- // Redirect preds to the unwind dest
- CatchSwitchBB->replaceAllUsesWith(CSI->getUnwindDest());
- } else {
- // Rewrite all preds to unwind to caller (or from invoke to call).
- SmallVector<BasicBlock *, 8> EHPreds(predecessors(CatchSwitchBB));
- for (BasicBlock *EHPred : EHPreds)
- removeUnwindEdge(EHPred);
- }
- // The catchswitch is no longer reachable.
- new UnreachableInst(CSI->getContext(), CSI);
- CSI->eraseFromParent();
- Changed = true;
- }
- } else if (isa<CleanupReturnInst>(TI)) {
- new UnreachableInst(TI->getContext(), TI);
- TI->eraseFromParent();
- Changed = true;
- }
- }
-
- // If this block is now dead, remove it.
- if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
- // We know there are no successors, so just nuke the block.
- if (LoopHeaders)
- LoopHeaders->erase(BB);
- BB->eraseFromParent();
- return true;
- }
-
- return Changed;
-}
-
-static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
- assert(Cases.size() >= 1);
-
- array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
- for (size_t I = 1, E = Cases.size(); I != E; ++I) {
- if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
- return false;
- }
- return true;
-}
-
-/// Turn a switch with two reachable destinations into an integer range
-/// comparison and branch.
-static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
- assert(SI->getNumCases() > 1 && "Degenerate switch?");
-
- bool HasDefault =
- !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
-
- // Partition the cases into two sets with different destinations.
- BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
- BasicBlock *DestB = nullptr;
- SmallVector<ConstantInt *, 16> CasesA;
- SmallVector<ConstantInt *, 16> CasesB;
-
- for (auto Case : SI->cases()) {
- BasicBlock *Dest = Case.getCaseSuccessor();
- if (!DestA)
- DestA = Dest;
- if (Dest == DestA) {
- CasesA.push_back(Case.getCaseValue());
- continue;
- }
- if (!DestB)
- DestB = Dest;
- if (Dest == DestB) {
- CasesB.push_back(Case.getCaseValue());
- continue;
- }
- return false; // More than two destinations.
- }
-
- assert(DestA && DestB &&
- "Single-destination switch should have been folded.");
- assert(DestA != DestB);
- assert(DestB != SI->getDefaultDest());
- assert(!CasesB.empty() && "There must be non-default cases.");
- assert(!CasesA.empty() || HasDefault);
-
- // Figure out if one of the sets of cases form a contiguous range.
- SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
- BasicBlock *ContiguousDest = nullptr;
- BasicBlock *OtherDest = nullptr;
- if (!CasesA.empty() && CasesAreContiguous(CasesA)) {
- ContiguousCases = &CasesA;
- ContiguousDest = DestA;
- OtherDest = DestB;
- } else if (CasesAreContiguous(CasesB)) {
- ContiguousCases = &CasesB;
- ContiguousDest = DestB;
- OtherDest = DestA;
- } else
- return false;
-
- // Start building the compare and branch.
-
- Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
- Constant *NumCases =
- ConstantInt::get(Offset->getType(), ContiguousCases->size());
-
- Value *Sub = SI->getCondition();
- if (!Offset->isNullValue())
- Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
-
- Value *Cmp;
- // If NumCases overflowed, then all possible values jump to the successor.
- if (NumCases->isNullValue() && !ContiguousCases->empty())
- Cmp = ConstantInt::getTrue(SI->getContext());
- else
- Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
- BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
-
- // Update weight for the newly-created conditional branch.
- if (HasBranchWeights(SI)) {
- SmallVector<uint64_t, 8> Weights;
- GetBranchWeights(SI, Weights);
- if (Weights.size() == 1 + SI->getNumCases()) {
- uint64_t TrueWeight = 0;
- uint64_t FalseWeight = 0;
- for (size_t I = 0, E = Weights.size(); I != E; ++I) {
- if (SI->getSuccessor(I) == ContiguousDest)
- TrueWeight += Weights[I];
- else
- FalseWeight += Weights[I];
- }
- while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
- TrueWeight /= 2;
- FalseWeight /= 2;
- }
- setBranchWeights(NewBI, TrueWeight, FalseWeight);
- }
- }
-
- // Prune obsolete incoming values off the successors' PHI nodes.
- for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
- unsigned PreviousEdges = ContiguousCases->size();
- if (ContiguousDest == SI->getDefaultDest())
- ++PreviousEdges;
- for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
- cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
- }
- for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
- unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
- if (OtherDest == SI->getDefaultDest())
- ++PreviousEdges;
- for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
- cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
- }
-
- // Drop the switch.
- SI->eraseFromParent();
-
- return true;
-}
-
-/// Compute masked bits for the condition of a switch
-/// and use it to remove dead cases.
-static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
- const DataLayout &DL) {
- Value *Cond = SI->getCondition();
- unsigned Bits = Cond->getType()->getIntegerBitWidth();
- KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
-
- // We can also eliminate cases by determining that their values are outside of
- // the limited range of the condition based on how many significant (non-sign)
- // bits are in the condition value.
- unsigned ExtraSignBits = ComputeNumSignBits(Cond, DL, 0, AC, SI) - 1;
- unsigned MaxSignificantBitsInCond = Bits - ExtraSignBits;
-
- // Gather dead cases.
- SmallVector<ConstantInt *, 8> DeadCases;
- for (auto &Case : SI->cases()) {
- const APInt &CaseVal = Case.getCaseValue()->getValue();
- if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
- (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {
- DeadCases.push_back(Case.getCaseValue());
- LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
- << " is dead.\n");
- }
- }
-
- // If we can prove that the cases must cover all possible values, the
- // default destination becomes dead and we can remove it. If we know some
- // of the bits in the value, we can use that to more precisely compute the
- // number of possible unique case values.
- bool HasDefault =
- !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
- const unsigned NumUnknownBits =
- Bits - (Known.Zero | Known.One).countPopulation();
- assert(NumUnknownBits <= Bits);
- if (HasDefault && DeadCases.empty() &&
- NumUnknownBits < 64 /* avoid overflow */ &&
- SI->getNumCases() == (1ULL << NumUnknownBits)) {
- LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
- BasicBlock *NewDefault =
- SplitBlockPredecessors(SI->getDefaultDest(), SI->getParent(), "");
- SI->setDefaultDest(&*NewDefault);
- SplitBlock(&*NewDefault, &NewDefault->front());
- auto *OldTI = NewDefault->getTerminator();
- new UnreachableInst(SI->getContext(), OldTI);
- EraseTerminatorAndDCECond(OldTI);
- return true;
- }
-
- if (DeadCases.empty())
- return false;
-
- SwitchInstProfUpdateWrapper SIW(*SI);
- for (ConstantInt *DeadCase : DeadCases) {
- SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
- assert(CaseI != SI->case_default() &&
- "Case was not found. Probably mistake in DeadCases forming.");
- // Prune unused values from PHI nodes.
- CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
- SIW.removeCase(CaseI);
- }
-
- return true;
-}
-
-/// If BB would be eligible for simplification by
-/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
-/// by an unconditional branch), look at the phi node for BB in the successor
-/// block and see if the incoming value is equal to CaseValue. If so, return
-/// the phi node, and set PhiIndex to BB's index in the phi node.
-static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
- BasicBlock *BB, int *PhiIndex) {
- if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
- return nullptr; // BB must be empty to be a candidate for simplification.
- if (!BB->getSinglePredecessor())
- return nullptr; // BB must be dominated by the switch.
-
- BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
- if (!Branch || !Branch->isUnconditional())
- return nullptr; // Terminator must be unconditional branch.
-
- BasicBlock *Succ = Branch->getSuccessor(0);
-
- for (PHINode &PHI : Succ->phis()) {
- int Idx = PHI.getBasicBlockIndex(BB);
- assert(Idx >= 0 && "PHI has no entry for predecessor?");
-
- Value *InValue = PHI.getIncomingValue(Idx);
- if (InValue != CaseValue)
- continue;
-
- *PhiIndex = Idx;
- return &PHI;
- }
-
- return nullptr;
-}
-
-/// Try to forward the condition of a switch instruction to a phi node
-/// dominated by the switch, if that would mean that some of the destination
-/// blocks of the switch can be folded away. Return true if a change is made.
-static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
- using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
-
- ForwardingNodesMap ForwardingNodes;
- BasicBlock *SwitchBlock = SI->getParent();
- bool Changed = false;
- for (auto &Case : SI->cases()) {
- ConstantInt *CaseValue = Case.getCaseValue();
- BasicBlock *CaseDest = Case.getCaseSuccessor();
-
- // Replace phi operands in successor blocks that are using the constant case
- // value rather than the switch condition variable:
- // switchbb:
- // switch i32 %x, label %default [
- // i32 17, label %succ
- // ...
- // succ:
- // %r = phi i32 ... [ 17, %switchbb ] ...
- // -->
- // %r = phi i32 ... [ %x, %switchbb ] ...
-
- for (PHINode &Phi : CaseDest->phis()) {
- // This only works if there is exactly 1 incoming edge from the switch to
- // a phi. If there is >1, that means multiple cases of the switch map to 1
- // value in the phi, and that phi value is not the switch condition. Thus,
- // this transform would not make sense (the phi would be invalid because
- // a phi can't have different incoming values from the same block).
- int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
- if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
- count(Phi.blocks(), SwitchBlock) == 1) {
- Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
- Changed = true;
- }
- }
-
- // Collect phi nodes that are indirectly using this switch's case constants.
- int PhiIdx;
- if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
- ForwardingNodes[Phi].push_back(PhiIdx);
- }
-
- for (auto &ForwardingNode : ForwardingNodes) {
- PHINode *Phi = ForwardingNode.first;
- SmallVectorImpl<int> &Indexes = ForwardingNode.second;
- if (Indexes.size() < 2)
- continue;
-
- for (int Index : Indexes)
- Phi->setIncomingValue(Index, SI->getCondition());
- Changed = true;
- }
-
- return Changed;
-}
-
-/// Return true if the backend will be able to handle
-/// initializing an array of constants like C.
-static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) {
- if (C->isThreadDependent())
- return false;
- if (C->isDLLImportDependent())
- return false;
-
- if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
- !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
- !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
- return false;
-
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
- if (!CE->isGEPWithNoNotionalOverIndexing())
- return false;
- if (!ValidLookupTableConstant(CE->getOperand(0), TTI))
- return false;
- }
-
- if (!TTI.shouldBuildLookupTablesForConstant(C))
- return false;
-
- return true;
-}
-
-/// If V is a Constant, return it. Otherwise, try to look up
-/// its constant value in ConstantPool, returning 0 if it's not there.
-static Constant *
-LookupConstant(Value *V,
- const SmallDenseMap<Value *, Constant *> &ConstantPool) {
- if (Constant *C = dyn_cast<Constant>(V))
- return C;
- return ConstantPool.lookup(V);
-}
-
-/// Try to fold instruction I into a constant. This works for
-/// simple instructions such as binary operations where both operands are
-/// constant or can be replaced by constants from the ConstantPool. Returns the
-/// resulting constant on success, 0 otherwise.
-static Constant *
-ConstantFold(Instruction *I, const DataLayout &DL,
- const SmallDenseMap<Value *, Constant *> &ConstantPool) {
- if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
- Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
- if (!A)
- return nullptr;
- if (A->isAllOnesValue())
- return LookupConstant(Select->getTrueValue(), ConstantPool);
- if (A->isNullValue())
- return LookupConstant(Select->getFalseValue(), ConstantPool);
- return nullptr;
- }
-
- SmallVector<Constant *, 4> COps;
- for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
- if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
- COps.push_back(A);
- else
- return nullptr;
- }
-
- if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
- return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0],
- COps[1], DL);
- }
-
- return ConstantFoldInstOperands(I, COps, DL);
-}
-
-/// Try to determine the resulting constant values in phi nodes
-/// at the common destination basic block, *CommonDest, for one of the case
-/// destionations CaseDest corresponding to value CaseVal (0 for the default
-/// case), of a switch instruction SI.
-static bool
-GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
- BasicBlock **CommonDest,
- SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
- const DataLayout &DL, const TargetTransformInfo &TTI) {
- // The block from which we enter the common destination.
- BasicBlock *Pred = SI->getParent();
-
- // If CaseDest is empty except for some side-effect free instructions through
- // which we can constant-propagate the CaseVal, continue to its successor.
- SmallDenseMap<Value *, Constant *> ConstantPool;
- ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
- for (Instruction &I :CaseDest->instructionsWithoutDebug()) {
- if (I.isTerminator()) {
- // If the terminator is a simple branch, continue to the next block.
- if (I.getNumSuccessors() != 1 || I.isExceptionalTerminator())
- return false;
- Pred = CaseDest;
- CaseDest = I.getSuccessor(0);
- } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) {
- // Instruction is side-effect free and constant.
-
- // If the instruction has uses outside this block or a phi node slot for
- // the block, it is not safe to bypass the instruction since it would then
- // no longer dominate all its uses.
- for (auto &Use : I.uses()) {
- User *User = Use.getUser();
- if (Instruction *I = dyn_cast<Instruction>(User))
- if (I->getParent() == CaseDest)
- continue;
- if (PHINode *Phi = dyn_cast<PHINode>(User))
- if (Phi->getIncomingBlock(Use) == CaseDest)
- continue;
- return false;
- }
-
- ConstantPool.insert(std::make_pair(&I, C));
- } else {
- break;
- }
- }
-
- // If we did not have a CommonDest before, use the current one.
- if (!*CommonDest)
- *CommonDest = CaseDest;
- // If the destination isn't the common one, abort.
- if (CaseDest != *CommonDest)
- return false;
-
- // Get the values for this case from phi nodes in the destination block.
- for (PHINode &PHI : (*CommonDest)->phis()) {
- int Idx = PHI.getBasicBlockIndex(Pred);
- if (Idx == -1)
- continue;
-
- Constant *ConstVal =
- LookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
- if (!ConstVal)
- return false;
-
- // Be conservative about which kinds of constants we support.
- if (!ValidLookupTableConstant(ConstVal, TTI))
- return false;
-
- Res.push_back(std::make_pair(&PHI, ConstVal));
- }
-
- return Res.size() > 0;
-}
-
-// Helper function used to add CaseVal to the list of cases that generate
-// Result. Returns the updated number of cases that generate this result.
-static uintptr_t MapCaseToResult(ConstantInt *CaseVal,
- SwitchCaseResultVectorTy &UniqueResults,
- Constant *Result) {
- for (auto &I : UniqueResults) {
- if (I.first == Result) {
- I.second.push_back(CaseVal);
- return I.second.size();
- }
- }
- UniqueResults.push_back(
- std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
- return 1;
-}
-
-// Helper function that initializes a map containing
-// results for the PHI node of the common destination block for a switch
-// instruction. Returns false if multiple PHI nodes have been found or if
-// there is not a common destination block for the switch.
-static bool
-InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest,
- SwitchCaseResultVectorTy &UniqueResults,
- Constant *&DefaultResult, const DataLayout &DL,
- const TargetTransformInfo &TTI,
- uintptr_t MaxUniqueResults, uintptr_t MaxCasesPerResult) {
- for (auto &I : SI->cases()) {
- ConstantInt *CaseVal = I.getCaseValue();
-
- // Resulting value at phi nodes for this case value.
- SwitchCaseResultsTy Results;
- if (!GetCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
- DL, TTI))
- return false;
-
- // Only one value per case is permitted.
- if (Results.size() > 1)
- return false;
-
- // Add the case->result mapping to UniqueResults.
- const uintptr_t NumCasesForResult =
- MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
-
- // Early out if there are too many cases for this result.
- if (NumCasesForResult > MaxCasesPerResult)
- return false;
-
- // Early out if there are too many unique results.
- if (UniqueResults.size() > MaxUniqueResults)
- return false;
-
- // Check the PHI consistency.
- if (!PHI)
- PHI = Results[0].first;
- else if (PHI != Results[0].first)
- return false;
- }
- // Find the default result value.
- SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults;
- BasicBlock *DefaultDest = SI->getDefaultDest();
- GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
- DL, TTI);
- // If the default value is not found abort unless the default destination
- // is unreachable.
- DefaultResult =
- DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
- if ((!DefaultResult &&
- !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
- return false;
-
- return true;
-}
-
-// Helper function that checks if it is possible to transform a switch with only
-// two cases (or two cases + default) that produces a result into a select.
-// Example:
-// switch (a) {
-// case 10: %0 = icmp eq i32 %a, 10
-// return 10; %1 = select i1 %0, i32 10, i32 4
-// case 20: ----> %2 = icmp eq i32 %a, 20
-// return 2; %3 = select i1 %2, i32 2, i32 %1
-// default:
-// return 4;
-// }
-static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector,
- Constant *DefaultResult, Value *Condition,
- IRBuilder<> &Builder) {
- assert(ResultVector.size() == 2 &&
- "We should have exactly two unique results at this point");
- // If we are selecting between only two cases transform into a simple
- // select or a two-way select if default is possible.
- if (ResultVector[0].second.size() == 1 &&
- ResultVector[1].second.size() == 1) {
- ConstantInt *const FirstCase = ResultVector[0].second[0];
- ConstantInt *const SecondCase = ResultVector[1].second[0];
-
- bool DefaultCanTrigger = DefaultResult;
- Value *SelectValue = ResultVector[1].first;
- if (DefaultCanTrigger) {
- Value *const ValueCompare =
- Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
- SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
- DefaultResult, "switch.select");
- }
- Value *const ValueCompare =
- Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
- return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
- SelectValue, "switch.select");
- }
-
- return nullptr;
-}
-
-// Helper function to cleanup a switch instruction that has been converted into
-// a select, fixing up PHI nodes and basic blocks.
-static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
- Value *SelectValue,
- IRBuilder<> &Builder) {
- BasicBlock *SelectBB = SI->getParent();
- while (PHI->getBasicBlockIndex(SelectBB) >= 0)
- PHI->removeIncomingValue(SelectBB);
- PHI->addIncoming(SelectValue, SelectBB);
-
- Builder.CreateBr(PHI->getParent());
-
- // Remove the switch.
- for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
- BasicBlock *Succ = SI->getSuccessor(i);
-
- if (Succ == PHI->getParent())
- continue;
- Succ->removePredecessor(SelectBB);
- }
- SI->eraseFromParent();
-}
-
-/// If the switch is only used to initialize one or more
-/// phi nodes in a common successor block with only two different
-/// constant values, replace the switch with select.
-static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
- const DataLayout &DL,
- const TargetTransformInfo &TTI) {
- Value *const Cond = SI->getCondition();
- PHINode *PHI = nullptr;
- BasicBlock *CommonDest = nullptr;
- Constant *DefaultResult;
- SwitchCaseResultVectorTy UniqueResults;
- // Collect all the cases that will deliver the same value from the switch.
- if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
- DL, TTI, 2, 1))
- return false;
- // Selects choose between maximum two values.
- if (UniqueResults.size() != 2)
- return false;
- assert(PHI != nullptr && "PHI for value select not found");
-
- Builder.SetInsertPoint(SI);
- Value *SelectValue =
- ConvertTwoCaseSwitch(UniqueResults, DefaultResult, Cond, Builder);
- if (SelectValue) {
- RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder);
- return true;
- }
- // The switch couldn't be converted into a select.
- return false;
-}
-
-namespace {
-
-/// This class represents a lookup table that can be used to replace a switch.
-class SwitchLookupTable {
-public:
- /// Create a lookup table to use as a switch replacement with the contents
- /// of Values, using DefaultValue to fill any holes in the table.
- SwitchLookupTable(
- Module &M, uint64_t TableSize, ConstantInt *Offset,
- const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
- Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
-
- /// Build instructions with Builder to retrieve the value at
- /// the position given by Index in the lookup table.
- Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
-
- /// Return true if a table with TableSize elements of
- /// type ElementType would fit in a target-legal register.
- static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
- Type *ElementType);
-
-private:
- // Depending on the contents of the table, it can be represented in
- // different ways.
- enum {
- // For tables where each element contains the same value, we just have to
- // store that single value and return it for each lookup.
- SingleValueKind,
-
- // For tables where there is a linear relationship between table index
- // and values. We calculate the result with a simple multiplication
- // and addition instead of a table lookup.
- LinearMapKind,
-
- // For small tables with integer elements, we can pack them into a bitmap
- // that fits into a target-legal register. Values are retrieved by
- // shift and mask operations.
- BitMapKind,
-
- // The table is stored as an array of values. Values are retrieved by load
- // instructions from the table.
- ArrayKind
- } Kind;
-
- // For SingleValueKind, this is the single value.
- Constant *SingleValue = nullptr;
-
- // For BitMapKind, this is the bitmap.
- ConstantInt *BitMap = nullptr;
- IntegerType *BitMapElementTy = nullptr;
-
- // For LinearMapKind, these are the constants used to derive the value.
- ConstantInt *LinearOffset = nullptr;
- ConstantInt *LinearMultiplier = nullptr;
-
- // For ArrayKind, this is the array.
- GlobalVariable *Array = nullptr;
-};
-
-} // end anonymous namespace
-
-SwitchLookupTable::SwitchLookupTable(
- Module &M, uint64_t TableSize, ConstantInt *Offset,
- const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
- Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
- assert(Values.size() && "Can't build lookup table without values!");
- assert(TableSize >= Values.size() && "Can't fit values in table!");
-
- // If all values in the table are equal, this is that value.
- SingleValue = Values.begin()->second;
-
- Type *ValueType = Values.begin()->second->getType();
-
- // Build up the table contents.
- SmallVector<Constant *, 64> TableContents(TableSize);
- for (size_t I = 0, E = Values.size(); I != E; ++I) {
- ConstantInt *CaseVal = Values[I].first;
- Constant *CaseRes = Values[I].second;
- assert(CaseRes->getType() == ValueType);
-
- uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
- TableContents[Idx] = CaseRes;
-
- if (CaseRes != SingleValue)
- SingleValue = nullptr;
- }
-
- // Fill in any holes in the table with the default result.
- if (Values.size() < TableSize) {
- assert(DefaultValue &&
- "Need a default value to fill the lookup table holes.");
- assert(DefaultValue->getType() == ValueType);
- for (uint64_t I = 0; I < TableSize; ++I) {
- if (!TableContents[I])
- TableContents[I] = DefaultValue;
- }
-
- if (DefaultValue != SingleValue)
- SingleValue = nullptr;
- }
-
- // If each element in the table contains the same value, we only need to store
- // that single value.
- if (SingleValue) {
- Kind = SingleValueKind;
- return;
- }
-
- // Check if we can derive the value with a linear transformation from the
- // table index.
- if (isa<IntegerType>(ValueType)) {
- bool LinearMappingPossible = true;
- APInt PrevVal;
- APInt DistToPrev;
- assert(TableSize >= 2 && "Should be a SingleValue table.");
- // Check if there is the same distance between two consecutive values.
- for (uint64_t I = 0; I < TableSize; ++I) {
- ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
- if (!ConstVal) {
- // This is an undef. We could deal with it, but undefs in lookup tables
- // are very seldom. It's probably not worth the additional complexity.
- LinearMappingPossible = false;
- break;
- }
- const APInt &Val = ConstVal->getValue();
- if (I != 0) {
- APInt Dist = Val - PrevVal;
- if (I == 1) {
- DistToPrev = Dist;
- } else if (Dist != DistToPrev) {
- LinearMappingPossible = false;
- break;
- }
- }
- PrevVal = Val;
- }
- if (LinearMappingPossible) {
- LinearOffset = cast<ConstantInt>(TableContents[0]);
- LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
- Kind = LinearMapKind;
- ++NumLinearMaps;
- return;
- }
- }
-
- // If the type is integer and the table fits in a register, build a bitmap.
- if (WouldFitInRegister(DL, TableSize, ValueType)) {
- IntegerType *IT = cast<IntegerType>(ValueType);
- APInt TableInt(TableSize * IT->getBitWidth(), 0);
- for (uint64_t I = TableSize; I > 0; --I) {
- TableInt <<= IT->getBitWidth();
- // Insert values into the bitmap. Undef values are set to zero.
- if (!isa<UndefValue>(TableContents[I - 1])) {
- ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
- TableInt |= Val->getValue().zext(TableInt.getBitWidth());
- }
- }
- BitMap = ConstantInt::get(M.getContext(), TableInt);
- BitMapElementTy = IT;
- Kind = BitMapKind;
- ++NumBitMaps;
- return;
- }
-
- // Store the table in an array.
- ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
- Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
-
- Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
- GlobalVariable::PrivateLinkage, Initializer,
- "switch.table." + FuncName);
- Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
- // Set the alignment to that of an array items. We will be only loading one
- // value out of it.
- Array->setAlignment(DL.getPrefTypeAlignment(ValueType));
- Kind = ArrayKind;
-}
-
-Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
- switch (Kind) {
- case SingleValueKind:
- return SingleValue;
- case LinearMapKind: {
- // Derive the result value from the input value.
- Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
- false, "switch.idx.cast");
- if (!LinearMultiplier->isOne())
- Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult");
- if (!LinearOffset->isZero())
- Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset");
- return Result;
- }
- case BitMapKind: {
- // Type of the bitmap (e.g. i59).
- IntegerType *MapTy = BitMap->getType();
-
- // Cast Index to the same type as the bitmap.
- // Note: The Index is <= the number of elements in the table, so
- // truncating it to the width of the bitmask is safe.
- Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
-
- // Multiply the shift amount by the element width.
- ShiftAmt = Builder.CreateMul(
- ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
- "switch.shiftamt");
-
- // Shift down.
- Value *DownShifted =
- Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
- // Mask off.
- return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
- }
- case ArrayKind: {
- // Make sure the table index will not overflow when treated as signed.
- IntegerType *IT = cast<IntegerType>(Index->getType());
- uint64_t TableSize =
- Array->getInitializer()->getType()->getArrayNumElements();
- if (TableSize > (1ULL << (IT->getBitWidth() - 1)))
- Index = Builder.CreateZExt(
- Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
- "switch.tableidx.zext");
-
- Value *GEPIndices[] = {Builder.getInt32(0), Index};
- Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
- GEPIndices, "switch.gep");
- return Builder.CreateLoad(
- cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
- "switch.load");
- }
- }
- llvm_unreachable("Unknown lookup table kind!");
-}
-
-bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
- uint64_t TableSize,
- Type *ElementType) {
- auto *IT = dyn_cast<IntegerType>(ElementType);
- if (!IT)
- return false;
- // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
- // are <= 15, we could try to narrow the type.
-
- // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
- if (TableSize >= UINT_MAX / IT->getBitWidth())
- return false;
- return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
-}
-
-/// Determine whether a lookup table should be built for this switch, based on
-/// the number of cases, size of the table, and the types of the results.
-static bool
-ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
- const TargetTransformInfo &TTI, const DataLayout &DL,
- const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
- if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
- return false; // TableSize overflowed, or mul below might overflow.
-
- bool AllTablesFitInRegister = true;
- bool HasIllegalType = false;
- for (const auto &I : ResultTypes) {
- Type *Ty = I.second;
-
- // Saturate this flag to true.
- HasIllegalType = HasIllegalType || !TTI.isTypeLegal(Ty);
-
- // Saturate this flag to false.
- AllTablesFitInRegister =
- AllTablesFitInRegister &&
- SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty);
-
- // If both flags saturate, we're done. NOTE: This *only* works with
- // saturating flags, and all flags have to saturate first due to the
- // non-deterministic behavior of iterating over a dense map.
- if (HasIllegalType && !AllTablesFitInRegister)
- break;
- }
-
- // If each table would fit in a register, we should build it anyway.
- if (AllTablesFitInRegister)
- return true;
-
- // Don't build a table that doesn't fit in-register if it has illegal types.
- if (HasIllegalType)
- return false;
-
- // The table density should be at least 40%. This is the same criterion as for
- // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
- // FIXME: Find the best cut-off.
- return SI->getNumCases() * 10 >= TableSize * 4;
-}
-
-/// Try to reuse the switch table index compare. Following pattern:
-/// \code
-/// if (idx < tablesize)
-/// r = table[idx]; // table does not contain default_value
-/// else
-/// r = default_value;
-/// if (r != default_value)
-/// ...
-/// \endcode
-/// Is optimized to:
-/// \code
-/// cond = idx < tablesize;
-/// if (cond)
-/// r = table[idx];
-/// else
-/// r = default_value;
-/// if (cond)
-/// ...
-/// \endcode
-/// Jump threading will then eliminate the second if(cond).
-static void reuseTableCompare(
- User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
- Constant *DefaultValue,
- const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
- ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
- if (!CmpInst)
- return;
-
- // We require that the compare is in the same block as the phi so that jump
- // threading can do its work afterwards.
- if (CmpInst->getParent() != PhiBlock)
- return;
-
- Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
- if (!CmpOp1)
- return;
-
- Value *RangeCmp = RangeCheckBranch->getCondition();
- Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
- Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
-
- // Check if the compare with the default value is constant true or false.
- Constant *DefaultConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
- DefaultValue, CmpOp1, true);
- if (DefaultConst != TrueConst && DefaultConst != FalseConst)
- return;
-
- // Check if the compare with the case values is distinct from the default
- // compare result.
- for (auto ValuePair : Values) {
- Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
- ValuePair.second, CmpOp1, true);
- if (!CaseConst || CaseConst == DefaultConst || isa<UndefValue>(CaseConst))
- return;
- assert((CaseConst == TrueConst || CaseConst == FalseConst) &&
- "Expect true or false as compare result.");
- }
-
- // Check if the branch instruction dominates the phi node. It's a simple
- // dominance check, but sufficient for our needs.
- // Although this check is invariant in the calling loops, it's better to do it
- // at this late stage. Practically we do it at most once for a switch.
- BasicBlock *BranchBlock = RangeCheckBranch->getParent();
- for (auto PI = pred_begin(PhiBlock), E = pred_end(PhiBlock); PI != E; ++PI) {
- BasicBlock *Pred = *PI;
- if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
- return;
- }
-
- if (DefaultConst == FalseConst) {
- // The compare yields the same result. We can replace it.
- CmpInst->replaceAllUsesWith(RangeCmp);
- ++NumTableCmpReuses;
- } else {
- // The compare yields the same result, just inverted. We can replace it.
- Value *InvertedTableCmp = BinaryOperator::CreateXor(
- RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
- RangeCheckBranch);
- CmpInst->replaceAllUsesWith(InvertedTableCmp);
- ++NumTableCmpReuses;
- }
-}
-
-/// If the switch is only used to initialize one or more phi nodes in a common
-/// successor block with different constant values, replace the switch with
-/// lookup tables.
-static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
- const DataLayout &DL,
- const TargetTransformInfo &TTI) {
- assert(SI->getNumCases() > 1 && "Degenerate switch?");
-
- Function *Fn = SI->getParent()->getParent();
- // Only build lookup table when we have a target that supports it or the
- // attribute is not set.
- if (!TTI.shouldBuildLookupTables() ||
- (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true"))
- return false;
-
- // FIXME: If the switch is too sparse for a lookup table, perhaps we could
- // split off a dense part and build a lookup table for that.
-
- // FIXME: This creates arrays of GEPs to constant strings, which means each
- // GEP needs a runtime relocation in PIC code. We should just build one big
- // string and lookup indices into that.
-
- // Ignore switches with less than three cases. Lookup tables will not make
- // them faster, so we don't analyze them.
- if (SI->getNumCases() < 3)
- return false;
-
- // Figure out the corresponding result for each case value and phi node in the
- // common destination, as well as the min and max case values.
- assert(!empty(SI->cases()));
- SwitchInst::CaseIt CI = SI->case_begin();
- ConstantInt *MinCaseVal = CI->getCaseValue();
- ConstantInt *MaxCaseVal = CI->getCaseValue();
-
- BasicBlock *CommonDest = nullptr;
-
- using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
- SmallDenseMap<PHINode *, ResultListTy> ResultLists;
-
- SmallDenseMap<PHINode *, Constant *> DefaultResults;
- SmallDenseMap<PHINode *, Type *> ResultTypes;
- SmallVector<PHINode *, 4> PHIs;
-
- for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
- ConstantInt *CaseVal = CI->getCaseValue();
- if (CaseVal->getValue().slt(MinCaseVal->getValue()))
- MinCaseVal = CaseVal;
- if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
- MaxCaseVal = CaseVal;
-
- // Resulting value at phi nodes for this case value.
- using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
- ResultsTy Results;
- if (!GetCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
- Results, DL, TTI))
- return false;
-
- // Append the result from this case to the list for each phi.
- for (const auto &I : Results) {
- PHINode *PHI = I.first;
- Constant *Value = I.second;
- if (!ResultLists.count(PHI))
- PHIs.push_back(PHI);
- ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
- }
- }
-
- // Keep track of the result types.
- for (PHINode *PHI : PHIs) {
- ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
- }
-
- uint64_t NumResults = ResultLists[PHIs[0]].size();
- APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
- uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
- bool TableHasHoles = (NumResults < TableSize);
-
- // If the table has holes, we need a constant result for the default case
- // or a bitmask that fits in a register.
- SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
- bool HasDefaultResults =
- GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
- DefaultResultsList, DL, TTI);
-
- bool NeedMask = (TableHasHoles && !HasDefaultResults);
- if (NeedMask) {
- // As an extra penalty for the validity test we require more cases.
- if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
- return false;
- if (!DL.fitsInLegalInteger(TableSize))
- return false;
- }
-
- for (const auto &I : DefaultResultsList) {
- PHINode *PHI = I.first;
- Constant *Result = I.second;
- DefaultResults[PHI] = Result;
- }
-
- if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
- return false;
-
- // Create the BB that does the lookups.
- Module &Mod = *CommonDest->getParent()->getParent();
- BasicBlock *LookupBB = BasicBlock::Create(
- Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
-
- // Compute the table index value.
- Builder.SetInsertPoint(SI);
- Value *TableIndex;
- if (MinCaseVal->isNullValue())
- TableIndex = SI->getCondition();
- else
- TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
- "switch.tableidx");
-
- // Compute the maximum table size representable by the integer type we are
- // switching upon.
- unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
- uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
- assert(MaxTableSize >= TableSize &&
- "It is impossible for a switch to have more entries than the max "
- "representable value of its input integer type's size.");
-
- // If the default destination is unreachable, or if the lookup table covers
- // all values of the conditional variable, branch directly to the lookup table
- // BB. Otherwise, check that the condition is within the case range.
- const bool DefaultIsReachable =
- !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
- const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
- BranchInst *RangeCheckBranch = nullptr;
-
- if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
- Builder.CreateBr(LookupBB);
- // Note: We call removeProdecessor later since we need to be able to get the
- // PHI value for the default case in case we're using a bit mask.
- } else {
- Value *Cmp = Builder.CreateICmpULT(
- TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
- RangeCheckBranch =
- Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
- }
-
- // Populate the BB that does the lookups.
- Builder.SetInsertPoint(LookupBB);
-
- if (NeedMask) {
- // Before doing the lookup, we do the hole check. The LookupBB is therefore
- // re-purposed to do the hole check, and we create a new LookupBB.
- BasicBlock *MaskBB = LookupBB;
- MaskBB->setName("switch.hole_check");
- LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
- CommonDest->getParent(), CommonDest);
-
- // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
- // unnecessary illegal types.
- uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
- APInt MaskInt(TableSizePowOf2, 0);
- APInt One(TableSizePowOf2, 1);
- // Build bitmask; fill in a 1 bit for every case.
- const ResultListTy &ResultList = ResultLists[PHIs[0]];
- for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
- uint64_t Idx = (ResultList[I].first->getValue() - MinCaseVal->getValue())
- .getLimitedValue();
- MaskInt |= One << Idx;
- }
- ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
-
- // Get the TableIndex'th bit of the bitmask.
- // If this bit is 0 (meaning hole) jump to the default destination,
- // else continue with table lookup.
- IntegerType *MapTy = TableMask->getType();
- Value *MaskIndex =
- Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
- Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
- Value *LoBit = Builder.CreateTrunc(
- Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
- Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
-
- Builder.SetInsertPoint(LookupBB);
- AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, SI->getParent());
- }
-
- if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
- // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
- // do not delete PHINodes here.
- SI->getDefaultDest()->removePredecessor(SI->getParent(),
- /*KeepOneInputPHIs=*/true);
- }
-
- bool ReturnedEarly = false;
- for (PHINode *PHI : PHIs) {
- const ResultListTy &ResultList = ResultLists[PHI];
-
- // If using a bitmask, use any value to fill the lookup table holes.
- Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
- StringRef FuncName = Fn->getName();
- SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL,
- FuncName);
-
- Value *Result = Table.BuildLookup(TableIndex, Builder);
-
- // If the result is used to return immediately from the function, we want to
- // do that right here.
- if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->user_begin()) &&
- PHI->user_back() == CommonDest->getFirstNonPHIOrDbg()) {
- Builder.CreateRet(Result);
- ReturnedEarly = true;
- break;
- }
-
- // Do a small peephole optimization: re-use the switch table compare if
- // possible.
- if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
- BasicBlock *PhiBlock = PHI->getParent();
- // Search for compare instructions which use the phi.
- for (auto *User : PHI->users()) {
- reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
- }
- }
-
- PHI->addIncoming(Result, LookupBB);
- }
-
- if (!ReturnedEarly)
- Builder.CreateBr(CommonDest);
-
- // Remove the switch.
- for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
- BasicBlock *Succ = SI->getSuccessor(i);
-
- if (Succ == SI->getDefaultDest())
- continue;
- Succ->removePredecessor(SI->getParent());
- }
- SI->eraseFromParent();
-
- ++NumLookupTables;
- if (NeedMask)
- ++NumLookupTablesHoles;
- return true;
-}
-
-static bool isSwitchDense(ArrayRef<int64_t> Values) {
- // See also SelectionDAGBuilder::isDense(), which this function was based on.
- uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
- uint64_t Range = Diff + 1;
- uint64_t NumCases = Values.size();
- // 40% is the default density for building a jump table in optsize/minsize mode.
- uint64_t MinDensity = 40;
-
- return NumCases * 100 >= Range * MinDensity;
-}
-
-/// Try to transform a switch that has "holes" in it to a contiguous sequence
-/// of cases.
-///
-/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
-/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
-///
-/// This converts a sparse switch into a dense switch which allows better
-/// lowering and could also allow transforming into a lookup table.
-static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
- const DataLayout &DL,
- const TargetTransformInfo &TTI) {
- auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
- if (CondTy->getIntegerBitWidth() > 64 ||
- !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
- return false;
- // Only bother with this optimization if there are more than 3 switch cases;
- // SDAG will only bother creating jump tables for 4 or more cases.
- if (SI->getNumCases() < 4)
- return false;
-
- // This transform is agnostic to the signedness of the input or case values. We
- // can treat the case values as signed or unsigned. We can optimize more common
- // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
- // as signed.
- SmallVector<int64_t,4> Values;
- for (auto &C : SI->cases())
- Values.push_back(C.getCaseValue()->getValue().getSExtValue());
- llvm::sort(Values);
-
- // If the switch is already dense, there's nothing useful to do here.
- if (isSwitchDense(Values))
- return false;
-
- // First, transform the values such that they start at zero and ascend.
- int64_t Base = Values[0];
- for (auto &V : Values)
- V -= (uint64_t)(Base);
-
- // Now we have signed numbers that have been shifted so that, given enough
- // precision, there are no negative values. Since the rest of the transform
- // is bitwise only, we switch now to an unsigned representation.
-
- // This transform can be done speculatively because it is so cheap - it
- // results in a single rotate operation being inserted.
- // FIXME: It's possible that optimizing a switch on powers of two might also
- // be beneficial - flag values are often powers of two and we could use a CLZ
- // as the key function.
-
- // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
- // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
- // less than 64.
- unsigned Shift = 64;
- for (auto &V : Values)
- Shift = std::min(Shift, countTrailingZeros((uint64_t)V));
- assert(Shift < 64);
- if (Shift > 0)
- for (auto &V : Values)
- V = (int64_t)((uint64_t)V >> Shift);
-
- if (!isSwitchDense(Values))
- // Transform didn't create a dense switch.
- return false;
-
- // The obvious transform is to shift the switch condition right and emit a
- // check that the condition actually cleanly divided by GCD, i.e.
- // C & (1 << Shift - 1) == 0
- // inserting a new CFG edge to handle the case where it didn't divide cleanly.
- //
- // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
- // shift and puts the shifted-off bits in the uppermost bits. If any of these
- // are nonzero then the switch condition will be very large and will hit the
- // default case.
-
- auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
- Builder.SetInsertPoint(SI);
- auto *ShiftC = ConstantInt::get(Ty, Shift);
- auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
- auto *LShr = Builder.CreateLShr(Sub, ShiftC);
- auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift);
- auto *Rot = Builder.CreateOr(LShr, Shl);
- SI->replaceUsesOfWith(SI->getCondition(), Rot);
-
- for (auto Case : SI->cases()) {
- auto *Orig = Case.getCaseValue();
- auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
- Case.setValue(
- cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue()))));
- }
- return true;
-}
-
-bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
- BasicBlock *BB = SI->getParent();
-
- if (isValueEqualityComparison(SI)) {
- // If we only have one predecessor, and if it is a branch on this value,
- // see if that predecessor totally determines the outcome of this switch.
- if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
- if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
- return requestResimplify();
-
- Value *Cond = SI->getCondition();
- if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
- if (SimplifySwitchOnSelect(SI, Select))
- return requestResimplify();
-
- // If the block only contains the switch, see if we can fold the block
- // away into any preds.
- if (SI == &*BB->instructionsWithoutDebug().begin())
- if (FoldValueComparisonIntoPredecessors(SI, Builder))
- return requestResimplify();
- }
-
- // Try to transform the switch into an icmp and a branch.
- if (TurnSwitchRangeIntoICmp(SI, Builder))
- return requestResimplify();
-
- // Remove unreachable cases.
- if (eliminateDeadSwitchCases(SI, Options.AC, DL))
- return requestResimplify();
-
- if (switchToSelect(SI, Builder, DL, TTI))
- return requestResimplify();
-
- if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
- return requestResimplify();
-
- // The conversion from switch to lookup tables results in difficult-to-analyze
- // code and makes pruning branches much harder. This is a problem if the
- // switch expression itself can still be restricted as a result of inlining or
- // CVP. Therefore, only apply this transformation during late stages of the
- // optimisation pipeline.
- if (Options.ConvertSwitchToLookupTable &&
- SwitchToLookupTable(SI, Builder, DL, TTI))
- return requestResimplify();
-
- if (ReduceSwitchRange(SI, Builder, DL, TTI))
- return requestResimplify();
-
- return false;
-}
-
-bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
- BasicBlock *BB = IBI->getParent();
- bool Changed = false;
-
- // Eliminate redundant destinations.
- SmallPtrSet<Value *, 8> Succs;
- for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
- BasicBlock *Dest = IBI->getDestination(i);
- if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
- Dest->removePredecessor(BB);
- IBI->removeDestination(i);
- --i;
- --e;
- Changed = true;
- }
- }
-
- if (IBI->getNumDestinations() == 0) {
- // If the indirectbr has no successors, change it to unreachable.
- new UnreachableInst(IBI->getContext(), IBI);
- EraseTerminatorAndDCECond(IBI);
- return true;
- }
-
- if (IBI->getNumDestinations() == 1) {
- // If the indirectbr has one successor, change it to a direct branch.
- BranchInst::Create(IBI->getDestination(0), IBI);
- EraseTerminatorAndDCECond(IBI);
- return true;
- }
-
- if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
- if (SimplifyIndirectBrOnSelect(IBI, SI))
- return requestResimplify();
- }
- return Changed;
-}
-
-/// Given an block with only a single landing pad and a unconditional branch
-/// try to find another basic block which this one can be merged with. This
-/// handles cases where we have multiple invokes with unique landing pads, but
-/// a shared handler.
-///
-/// We specifically choose to not worry about merging non-empty blocks
-/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
-/// practice, the optimizer produces empty landing pad blocks quite frequently
-/// when dealing with exception dense code. (see: instcombine, gvn, if-else
-/// sinking in this file)
-///
-/// This is primarily a code size optimization. We need to avoid performing
-/// any transform which might inhibit optimization (such as our ability to
-/// specialize a particular handler via tail commoning). We do this by not
-/// merging any blocks which require us to introduce a phi. Since the same
-/// values are flowing through both blocks, we don't lose any ability to
-/// specialize. If anything, we make such specialization more likely.
-///
-/// TODO - This transformation could remove entries from a phi in the target
-/// block when the inputs in the phi are the same for the two blocks being
-/// merged. In some cases, this could result in removal of the PHI entirely.
-static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
- BasicBlock *BB) {
- auto Succ = BB->getUniqueSuccessor();
- assert(Succ);
- // If there's a phi in the successor block, we'd likely have to introduce
- // a phi into the merged landing pad block.
- if (isa<PHINode>(*Succ->begin()))
- return false;
-
- for (BasicBlock *OtherPred : predecessors(Succ)) {
- if (BB == OtherPred)
- continue;
- BasicBlock::iterator I = OtherPred->begin();
- LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
- if (!LPad2 || !LPad2->isIdenticalTo(LPad))
- continue;
- for (++I; isa<DbgInfoIntrinsic>(I); ++I)
- ;
- BranchInst *BI2 = dyn_cast<BranchInst>(I);
- if (!BI2 || !BI2->isIdenticalTo(BI))
- continue;
-
- // We've found an identical block. Update our predecessors to take that
- // path instead and make ourselves dead.
- SmallPtrSet<BasicBlock *, 16> Preds;
- Preds.insert(pred_begin(BB), pred_end(BB));
- for (BasicBlock *Pred : Preds) {
- InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
- assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
- "unexpected successor");
- II->setUnwindDest(OtherPred);
- }
-
- // The debug info in OtherPred doesn't cover the merged control flow that
- // used to go through BB. We need to delete it or update it.
- for (auto I = OtherPred->begin(), E = OtherPred->end(); I != E;) {
- Instruction &Inst = *I;
- I++;
- if (isa<DbgInfoIntrinsic>(Inst))
- Inst.eraseFromParent();
- }
-
- SmallPtrSet<BasicBlock *, 16> Succs;
- Succs.insert(succ_begin(BB), succ_end(BB));
- for (BasicBlock *Succ : Succs) {
- Succ->removePredecessor(BB);
- }
-
- IRBuilder<> Builder(BI);
- Builder.CreateUnreachable();
- BI->eraseFromParent();
- return true;
- }
- return false;
-}
-
-bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
- IRBuilder<> &Builder) {
- BasicBlock *BB = BI->getParent();
- BasicBlock *Succ = BI->getSuccessor(0);
-
- // If the Terminator is the only non-phi instruction, simplify the block.
- // If LoopHeader is provided, check if the block or its successor is a loop
- // header. (This is for early invocations before loop simplify and
- // vectorization to keep canonical loop forms for nested loops. These blocks
- // can be eliminated when the pass is invoked later in the back-end.)
- // Note that if BB has only one predecessor then we do not introduce new
- // backedge, so we can eliminate BB.
- bool NeedCanonicalLoop =
- Options.NeedCanonicalLoop &&
- (LoopHeaders && BB->hasNPredecessorsOrMore(2) &&
- (LoopHeaders->count(BB) || LoopHeaders->count(Succ)));
- BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
- if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
- !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB))
- return true;
-
- // If the only instruction in the block is a seteq/setne comparison against a
- // constant, try to simplify the block.
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
- if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
- for (++I; isa<DbgInfoIntrinsic>(I); ++I)
- ;
- if (I->isTerminator() &&
- tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
- return true;
- }
-
- // See if we can merge an empty landing pad block with another which is
- // equivalent.
- if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
- for (++I; isa<DbgInfoIntrinsic>(I); ++I)
- ;
- if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB))
- return true;
- }
-
- // If this basic block is ONLY a compare and a branch, and if a predecessor
- // branches to us and our successor, fold the comparison into the
- // predecessor and use logical operations to update the incoming value
- // for PHI nodes in common successor.
- if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
- return requestResimplify();
- return false;
-}
-
-static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
- BasicBlock *PredPred = nullptr;
- for (auto *P : predecessors(BB)) {
- BasicBlock *PPred = P->getSinglePredecessor();
- if (!PPred || (PredPred && PredPred != PPred))
- return nullptr;
- PredPred = PPred;
- }
- return PredPred;
-}
-
-bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
- BasicBlock *BB = BI->getParent();
- const Function *Fn = BB->getParent();
- if (Fn && Fn->hasFnAttribute(Attribute::OptForFuzzing))
- return false;
-
- // Conditional branch
- if (isValueEqualityComparison(BI)) {
- // If we only have one predecessor, and if it is a branch on this value,
- // see if that predecessor totally determines the outcome of this
- // switch.
- if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
- if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
- return requestResimplify();
-
- // This block must be empty, except for the setcond inst, if it exists.
- // Ignore dbg intrinsics.
- auto I = BB->instructionsWithoutDebug().begin();
- if (&*I == BI) {
- if (FoldValueComparisonIntoPredecessors(BI, Builder))
- return requestResimplify();
- } else if (&*I == cast<Instruction>(BI->getCondition())) {
- ++I;
- if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
- return requestResimplify();
- }
- }
-
- // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
- if (SimplifyBranchOnICmpChain(BI, Builder, DL))
- return true;
-
- // If this basic block has dominating predecessor blocks and the dominating
- // blocks' conditions imply BI's condition, we know the direction of BI.
- Optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
- if (Imp) {
- // Turn this into a branch on constant.
- auto *OldCond = BI->getCondition();
- ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
- : ConstantInt::getFalse(BB->getContext());
- BI->setCondition(TorF);
- RecursivelyDeleteTriviallyDeadInstructions(OldCond);
- return requestResimplify();
- }
-
- // If this basic block is ONLY a compare and a branch, and if a predecessor
- // branches to us and one of our successors, fold the comparison into the
- // predecessor and use logical operations to pick the right destination.
- if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
- return requestResimplify();
-
- // We have a conditional branch to two blocks that are only reachable
- // from BI. We know that the condbr dominates the two blocks, so see if
- // there is any identical code in the "then" and "else" blocks. If so, we
- // can hoist it up to the branching block.
- if (BI->getSuccessor(0)->getSinglePredecessor()) {
- if (BI->getSuccessor(1)->getSinglePredecessor()) {
- if (HoistThenElseCodeToIf(BI, TTI))
- return requestResimplify();
- } else {
- // If Successor #1 has multiple preds, we may be able to conditionally
- // execute Successor #0 if it branches to Successor #1.
- Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
- if (Succ0TI->getNumSuccessors() == 1 &&
- Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
- if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI))
- return requestResimplify();
- }
- } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
- // If Successor #0 has multiple preds, we may be able to conditionally
- // execute Successor #1 if it branches to Successor #0.
- Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
- if (Succ1TI->getNumSuccessors() == 1 &&
- Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
- if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI))
- return requestResimplify();
- }
-
- // If this is a branch on a phi node in the current block, thread control
- // through this block if any PHI node entries are constants.
- if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
- if (PN->getParent() == BI->getParent())
- if (FoldCondBranchOnPHI(BI, DL, Options.AC))
- return requestResimplify();
-
- // Scan predecessor blocks for conditional branches.
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
- if (PBI != BI && PBI->isConditional())
- if (SimplifyCondBranchToCondBranch(PBI, BI, DL))
- return requestResimplify();
-
- // Look for diamond patterns.
- if (MergeCondStores)
- if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
- if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
- if (PBI != BI && PBI->isConditional())
- if (mergeConditionalStores(PBI, BI, DL))
- return requestResimplify();
-
- return false;
-}
-
-/// Check if passing a value to an instruction will cause undefined behavior.
-static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
- Constant *C = dyn_cast<Constant>(V);
- if (!C)
- return false;
-
- if (I->use_empty())
- return false;
-
- if (C->isNullValue() || isa<UndefValue>(C)) {
- // Only look at the first use, avoid hurting compile time with long uselists
- User *Use = *I->user_begin();
-
- // Now make sure that there are no instructions in between that can alter
- // control flow (eg. calls)
- for (BasicBlock::iterator
- i = ++BasicBlock::iterator(I),
- UI = BasicBlock::iterator(dyn_cast<Instruction>(Use));
- i != UI; ++i)
- if (i == I->getParent()->end() || i->mayHaveSideEffects())
- return false;
-
- // Look through GEPs. A load from a GEP derived from NULL is still undefined
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
- if (GEP->getPointerOperand() == I)
- return passingValueIsAlwaysUndefined(V, GEP);
-
- // Look through bitcasts.
- if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
- return passingValueIsAlwaysUndefined(V, BC);
-
- // Load from null is undefined.
- if (LoadInst *LI = dyn_cast<LoadInst>(Use))
- if (!LI->isVolatile())
- return !NullPointerIsDefined(LI->getFunction(),
- LI->getPointerAddressSpace());
-
- // Store to null is undefined.
- if (StoreInst *SI = dyn_cast<StoreInst>(Use))
- if (!SI->isVolatile())
- return (!NullPointerIsDefined(SI->getFunction(),
- SI->getPointerAddressSpace())) &&
- SI->getPointerOperand() == I;
-
- // A call to null is undefined.
- if (auto CS = CallSite(Use))
- return !NullPointerIsDefined(CS->getFunction()) &&
- CS.getCalledValue() == I;
- }
- return false;
-}
-
-/// If BB has an incoming value that will always trigger undefined behavior
-/// (eg. null pointer dereference), remove the branch leading here.
-static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
- for (PHINode &PHI : BB->phis())
- for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
- if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
- Instruction *T = PHI.getIncomingBlock(i)->getTerminator();
- IRBuilder<> Builder(T);
- if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
- BB->removePredecessor(PHI.getIncomingBlock(i));
- // Turn uncoditional branches into unreachables and remove the dead
- // destination from conditional branches.
- if (BI->isUnconditional())
- Builder.CreateUnreachable();
- else
- Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
- : BI->getSuccessor(0));
- BI->eraseFromParent();
- return true;
- }
- // TODO: SwitchInst.
- }
-
- return false;
-}
-
-bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
- bool Changed = false;
-
- assert(BB && BB->getParent() && "Block not embedded in function!");
- assert(BB->getTerminator() && "Degenerate basic block encountered!");
-
- // Remove basic blocks that have no predecessors (except the entry block)...
- // or that just have themself as a predecessor. These are unreachable.
- if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
- BB->getSinglePredecessor() == BB) {
- LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
- DeleteDeadBlock(BB);
- return true;
- }
-
- // Check to see if we can constant propagate this terminator instruction
- // away...
- Changed |= ConstantFoldTerminator(BB, true);
-
- // Check for and eliminate duplicate PHI nodes in this block.
- Changed |= EliminateDuplicatePHINodes(BB);
-
- // Check for and remove branches that will always cause undefined behavior.
- Changed |= removeUndefIntroducingPredecessor(BB);
-
- // Merge basic blocks into their predecessor if there is only one distinct
- // pred, and if there is only one distinct successor of the predecessor, and
- // if there are no PHI nodes.
- if (MergeBlockIntoPredecessor(BB))
- return true;
-
- if (SinkCommon && Options.SinkCommonInsts)
- Changed |= SinkCommonCodeFromPredecessors(BB);
-
- IRBuilder<> Builder(BB);
-
- // If there is a trivial two-entry PHI node in this basic block, and we can
- // eliminate it, do so now.
- if (auto *PN = dyn_cast<PHINode>(BB->begin()))
- if (PN->getNumIncomingValues() == 2)
- Changed |= FoldTwoEntryPHINode(PN, TTI, DL);
-
- Builder.SetInsertPoint(BB->getTerminator());
- if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
- if (BI->isUnconditional()) {
- if (SimplifyUncondBranch(BI, Builder))
- return true;
- } else {
- if (SimplifyCondBranch(BI, Builder))
- return true;
- }
- } else if (auto *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
- if (SimplifyReturn(RI, Builder))
- return true;
- } else if (auto *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
- if (SimplifyResume(RI, Builder))
- return true;
- } else if (auto *RI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
- if (SimplifyCleanupReturn(RI))
- return true;
- } else if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
- if (SimplifySwitch(SI, Builder))
- return true;
- } else if (auto *UI = dyn_cast<UnreachableInst>(BB->getTerminator())) {
- if (SimplifyUnreachable(UI))
- return true;
- } else if (auto *IBI = dyn_cast<IndirectBrInst>(BB->getTerminator())) {
- if (SimplifyIndirectBr(IBI))
- return true;
- }
-
- return Changed;
-}
-
-bool SimplifyCFGOpt::run(BasicBlock *BB) {
- bool Changed = false;
-
- // Repeated simplify BB as long as resimplification is requested.
- do {
- Resimplify = false;
-
- // Perform one round of simplifcation. Resimplify flag will be set if
- // another iteration is requested.
- Changed |= simplifyOnce(BB);
- } while (Resimplify);
-
- return Changed;
-}
-
-bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
- const SimplifyCFGOptions &Options,
- SmallPtrSetImpl<BasicBlock *> *LoopHeaders) {
- return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), LoopHeaders,
- Options)
- .run(BB);
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
deleted file mode 100644
index cbb114f9a47a..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ /dev/null
@@ -1,957 +0,0 @@
-//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements induction variable simplification. It does
-// not define any actual pass or policy, but provides a single function to
-// simplify a loop's induction variables based on ScalarEvolution.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/SimplifyIndVar.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/PatternMatch.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Local.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "indvars"
-
-STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
-STATISTIC(NumElimOperand, "Number of IV operands folded into a use");
-STATISTIC(NumFoldedUser, "Number of IV users folded into a constant");
-STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
-STATISTIC(
- NumSimplifiedSDiv,
- "Number of IV signed division operations converted to unsigned division");
-STATISTIC(
- NumSimplifiedSRem,
- "Number of IV signed remainder operations converted to unsigned remainder");
-STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
-
-namespace {
- /// This is a utility for simplifying induction variables
- /// based on ScalarEvolution. It is the primary instrument of the
- /// IndvarSimplify pass, but it may also be directly invoked to cleanup after
- /// other loop passes that preserve SCEV.
- class SimplifyIndvar {
- Loop *L;
- LoopInfo *LI;
- ScalarEvolution *SE;
- DominatorTree *DT;
- SCEVExpander &Rewriter;
- SmallVectorImpl<WeakTrackingVH> &DeadInsts;
-
- bool Changed;
-
- public:
- SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
- LoopInfo *LI, SCEVExpander &Rewriter,
- SmallVectorImpl<WeakTrackingVH> &Dead)
- : L(Loop), LI(LI), SE(SE), DT(DT), Rewriter(Rewriter), DeadInsts(Dead),
- Changed(false) {
- assert(LI && "IV simplification requires LoopInfo");
- }
-
- bool hasChanged() const { return Changed; }
-
- /// Iteratively perform simplification on a worklist of users of the
- /// specified induction variable. This is the top-level driver that applies
- /// all simplifications to users of an IV.
- void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr);
-
- Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand);
-
- bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
- bool replaceIVUserWithLoopInvariant(Instruction *UseInst);
-
- bool eliminateOverflowIntrinsic(WithOverflowInst *WO);
- bool eliminateSaturatingIntrinsic(SaturatingInst *SI);
- bool eliminateTrunc(TruncInst *TI);
- bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
- bool makeIVComparisonInvariant(ICmpInst *ICmp, Value *IVOperand);
- void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
- void simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand,
- bool IsSigned);
- void replaceRemWithNumerator(BinaryOperator *Rem);
- void replaceRemWithNumeratorOrZero(BinaryOperator *Rem);
- void replaceSRemWithURem(BinaryOperator *Rem);
- bool eliminateSDiv(BinaryOperator *SDiv);
- bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand);
- bool strengthenRightShift(BinaryOperator *BO, Value *IVOperand);
- };
-}
-
-/// Fold an IV operand into its use. This removes increments of an
-/// aligned IV when used by a instruction that ignores the low bits.
-///
-/// IVOperand is guaranteed SCEVable, but UseInst may not be.
-///
-/// Return the operand of IVOperand for this induction variable if IVOperand can
-/// be folded (in case more folding opportunities have been exposed).
-/// Otherwise return null.
-Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) {
- Value *IVSrc = nullptr;
- const unsigned OperIdx = 0;
- const SCEV *FoldedExpr = nullptr;
- bool MustDropExactFlag = false;
- switch (UseInst->getOpcode()) {
- default:
- return nullptr;
- case Instruction::UDiv:
- case Instruction::LShr:
- // We're only interested in the case where we know something about
- // the numerator and have a constant denominator.
- if (IVOperand != UseInst->getOperand(OperIdx) ||
- !isa<ConstantInt>(UseInst->getOperand(1)))
- return nullptr;
-
- // Attempt to fold a binary operator with constant operand.
- // e.g. ((I + 1) >> 2) => I >> 2
- if (!isa<BinaryOperator>(IVOperand)
- || !isa<ConstantInt>(IVOperand->getOperand(1)))
- return nullptr;
-
- IVSrc = IVOperand->getOperand(0);
- // IVSrc must be the (SCEVable) IV, since the other operand is const.
- assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand");
-
- ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1));
- if (UseInst->getOpcode() == Instruction::LShr) {
- // Get a constant for the divisor. See createSCEV.
- uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth();
- if (D->getValue().uge(BitWidth))
- return nullptr;
-
- D = ConstantInt::get(UseInst->getContext(),
- APInt::getOneBitSet(BitWidth, D->getZExtValue()));
- }
- FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D));
- // We might have 'exact' flag set at this point which will no longer be
- // correct after we make the replacement.
- if (UseInst->isExact() &&
- SE->getSCEV(IVSrc) != SE->getMulExpr(FoldedExpr, SE->getSCEV(D)))
- MustDropExactFlag = true;
- }
- // We have something that might fold it's operand. Compare SCEVs.
- if (!SE->isSCEVable(UseInst->getType()))
- return nullptr;
-
- // Bypass the operand if SCEV can prove it has no effect.
- if (SE->getSCEV(UseInst) != FoldedExpr)
- return nullptr;
-
- LLVM_DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
- << " -> " << *UseInst << '\n');
-
- UseInst->setOperand(OperIdx, IVSrc);
- assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper");
-
- if (MustDropExactFlag)
- UseInst->dropPoisonGeneratingFlags();
-
- ++NumElimOperand;
- Changed = true;
- if (IVOperand->use_empty())
- DeadInsts.emplace_back(IVOperand);
- return IVSrc;
-}
-
-bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
- Value *IVOperand) {
- unsigned IVOperIdx = 0;
- ICmpInst::Predicate Pred = ICmp->getPredicate();
- if (IVOperand != ICmp->getOperand(0)) {
- // Swapped
- assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
- IVOperIdx = 1;
- Pred = ICmpInst::getSwappedPredicate(Pred);
- }
-
- // Get the SCEVs for the ICmp operands (in the specific context of the
- // current loop)
- const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
- const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop);
- const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop);
-
- ICmpInst::Predicate InvariantPredicate;
- const SCEV *InvariantLHS, *InvariantRHS;
-
- auto *PN = dyn_cast<PHINode>(IVOperand);
- if (!PN)
- return false;
- if (!SE->isLoopInvariantPredicate(Pred, S, X, L, InvariantPredicate,
- InvariantLHS, InvariantRHS))
- return false;
-
- // Rewrite the comparison to a loop invariant comparison if it can be done
- // cheaply, where cheaply means "we don't need to emit any new
- // instructions".
-
- SmallDenseMap<const SCEV*, Value*> CheapExpansions;
- CheapExpansions[S] = ICmp->getOperand(IVOperIdx);
- CheapExpansions[X] = ICmp->getOperand(1 - IVOperIdx);
-
- // TODO: Support multiple entry loops? (We currently bail out of these in
- // the IndVarSimplify pass)
- if (auto *BB = L->getLoopPredecessor()) {
- const int Idx = PN->getBasicBlockIndex(BB);
- if (Idx >= 0) {
- Value *Incoming = PN->getIncomingValue(Idx);
- const SCEV *IncomingS = SE->getSCEV(Incoming);
- CheapExpansions[IncomingS] = Incoming;
- }
- }
- Value *NewLHS = CheapExpansions[InvariantLHS];
- Value *NewRHS = CheapExpansions[InvariantRHS];
-
- if (!NewLHS)
- if (auto *ConstLHS = dyn_cast<SCEVConstant>(InvariantLHS))
- NewLHS = ConstLHS->getValue();
- if (!NewRHS)
- if (auto *ConstRHS = dyn_cast<SCEVConstant>(InvariantRHS))
- NewRHS = ConstRHS->getValue();
-
- if (!NewLHS || !NewRHS)
- // We could not find an existing value to replace either LHS or RHS.
- // Generating new instructions has subtler tradeoffs, so avoid doing that
- // for now.
- return false;
-
- LLVM_DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n');
- ICmp->setPredicate(InvariantPredicate);
- ICmp->setOperand(0, NewLHS);
- ICmp->setOperand(1, NewRHS);
- return true;
-}
-
-/// SimplifyIVUsers helper for eliminating useless
-/// comparisons against an induction variable.
-void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
- unsigned IVOperIdx = 0;
- ICmpInst::Predicate Pred = ICmp->getPredicate();
- ICmpInst::Predicate OriginalPred = Pred;
- if (IVOperand != ICmp->getOperand(0)) {
- // Swapped
- assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
- IVOperIdx = 1;
- Pred = ICmpInst::getSwappedPredicate(Pred);
- }
-
- // Get the SCEVs for the ICmp operands (in the specific context of the
- // current loop)
- const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
- const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop);
- const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop);
-
- // If the condition is always true or always false, replace it with
- // a constant value.
- if (SE->isKnownPredicate(Pred, S, X)) {
- ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
- DeadInsts.emplace_back(ICmp);
- LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
- } else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) {
- ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
- DeadInsts.emplace_back(ICmp);
- LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
- } else if (makeIVComparisonInvariant(ICmp, IVOperand)) {
- // fallthrough to end of function
- } else if (ICmpInst::isSigned(OriginalPred) &&
- SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) {
- // If we were unable to make anything above, all we can is to canonicalize
- // the comparison hoping that it will open the doors for other
- // optimizations. If we find out that we compare two non-negative values,
- // we turn the instruction's predicate to its unsigned version. Note that
- // we cannot rely on Pred here unless we check if we have swapped it.
- assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?");
- LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp
- << '\n');
- ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred));
- } else
- return;
-
- ++NumElimCmp;
- Changed = true;
-}
-
-bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) {
- // Get the SCEVs for the ICmp operands.
- auto *N = SE->getSCEV(SDiv->getOperand(0));
- auto *D = SE->getSCEV(SDiv->getOperand(1));
-
- // Simplify unnecessary loops away.
- const Loop *L = LI->getLoopFor(SDiv->getParent());
- N = SE->getSCEVAtScope(N, L);
- D = SE->getSCEVAtScope(D, L);
-
- // Replace sdiv by udiv if both of the operands are non-negative
- if (SE->isKnownNonNegative(N) && SE->isKnownNonNegative(D)) {
- auto *UDiv = BinaryOperator::Create(
- BinaryOperator::UDiv, SDiv->getOperand(0), SDiv->getOperand(1),
- SDiv->getName() + ".udiv", SDiv);
- UDiv->setIsExact(SDiv->isExact());
- SDiv->replaceAllUsesWith(UDiv);
- LLVM_DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n');
- ++NumSimplifiedSDiv;
- Changed = true;
- DeadInsts.push_back(SDiv);
- return true;
- }
-
- return false;
-}
-
-// i %s n -> i %u n if i >= 0 and n >= 0
-void SimplifyIndvar::replaceSRemWithURem(BinaryOperator *Rem) {
- auto *N = Rem->getOperand(0), *D = Rem->getOperand(1);
- auto *URem = BinaryOperator::Create(BinaryOperator::URem, N, D,
- Rem->getName() + ".urem", Rem);
- Rem->replaceAllUsesWith(URem);
- LLVM_DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem << '\n');
- ++NumSimplifiedSRem;
- Changed = true;
- DeadInsts.emplace_back(Rem);
-}
-
-// i % n --> i if i is in [0,n).
-void SimplifyIndvar::replaceRemWithNumerator(BinaryOperator *Rem) {
- Rem->replaceAllUsesWith(Rem->getOperand(0));
- LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
- ++NumElimRem;
- Changed = true;
- DeadInsts.emplace_back(Rem);
-}
-
-// (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
-void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator *Rem) {
- auto *T = Rem->getType();
- auto *N = Rem->getOperand(0), *D = Rem->getOperand(1);
- ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, N, D);
- SelectInst *Sel =
- SelectInst::Create(ICmp, ConstantInt::get(T, 0), N, "iv.rem", Rem);
- Rem->replaceAllUsesWith(Sel);
- LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
- ++NumElimRem;
- Changed = true;
- DeadInsts.emplace_back(Rem);
-}
-
-/// SimplifyIVUsers helper for eliminating useless remainder operations
-/// operating on an induction variable or replacing srem by urem.
-void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand,
- bool IsSigned) {
- auto *NValue = Rem->getOperand(0);
- auto *DValue = Rem->getOperand(1);
- // We're only interested in the case where we know something about
- // the numerator, unless it is a srem, because we want to replace srem by urem
- // in general.
- bool UsedAsNumerator = IVOperand == NValue;
- if (!UsedAsNumerator && !IsSigned)
- return;
-
- const SCEV *N = SE->getSCEV(NValue);
-
- // Simplify unnecessary loops away.
- const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
- N = SE->getSCEVAtScope(N, ICmpLoop);
-
- bool IsNumeratorNonNegative = !IsSigned || SE->isKnownNonNegative(N);
-
- // Do not proceed if the Numerator may be negative
- if (!IsNumeratorNonNegative)
- return;
-
- const SCEV *D = SE->getSCEV(DValue);
- D = SE->getSCEVAtScope(D, ICmpLoop);
-
- if (UsedAsNumerator) {
- auto LT = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
- if (SE->isKnownPredicate(LT, N, D)) {
- replaceRemWithNumerator(Rem);
- return;
- }
-
- auto *T = Rem->getType();
- const auto *NLessOne = SE->getMinusSCEV(N, SE->getOne(T));
- if (SE->isKnownPredicate(LT, NLessOne, D)) {
- replaceRemWithNumeratorOrZero(Rem);
- return;
- }
- }
-
- // Try to replace SRem with URem, if both N and D are known non-negative.
- // Since we had already check N, we only need to check D now
- if (!IsSigned || !SE->isKnownNonNegative(D))
- return;
-
- replaceSRemWithURem(Rem);
-}
-
-static bool willNotOverflow(ScalarEvolution *SE, Instruction::BinaryOps BinOp,
- bool Signed, const SCEV *LHS, const SCEV *RHS) {
- const SCEV *(ScalarEvolution::*Operation)(const SCEV *, const SCEV *,
- SCEV::NoWrapFlags, unsigned);
- switch (BinOp) {
- default:
- llvm_unreachable("Unsupported binary op");
- case Instruction::Add:
- Operation = &ScalarEvolution::getAddExpr;
- break;
- case Instruction::Sub:
- Operation = &ScalarEvolution::getMinusSCEV;
- break;
- case Instruction::Mul:
- Operation = &ScalarEvolution::getMulExpr;
- break;
- }
-
- const SCEV *(ScalarEvolution::*Extension)(const SCEV *, Type *, unsigned) =
- Signed ? &ScalarEvolution::getSignExtendExpr
- : &ScalarEvolution::getZeroExtendExpr;
-
- // Check ext(LHS op RHS) == ext(LHS) op ext(RHS)
- auto *NarrowTy = cast<IntegerType>(LHS->getType());
- auto *WideTy =
- IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);
-
- const SCEV *A =
- (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0),
- WideTy, 0);
- const SCEV *B =
- (SE->*Operation)((SE->*Extension)(LHS, WideTy, 0),
- (SE->*Extension)(RHS, WideTy, 0), SCEV::FlagAnyWrap, 0);
- return A == B;
-}
-
-bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) {
- const SCEV *LHS = SE->getSCEV(WO->getLHS());
- const SCEV *RHS = SE->getSCEV(WO->getRHS());
- if (!willNotOverflow(SE, WO->getBinaryOp(), WO->isSigned(), LHS, RHS))
- return false;
-
- // Proved no overflow, nuke the overflow check and, if possible, the overflow
- // intrinsic as well.
-
- BinaryOperator *NewResult = BinaryOperator::Create(
- WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), "", WO);
-
- if (WO->isSigned())
- NewResult->setHasNoSignedWrap(true);
- else
- NewResult->setHasNoUnsignedWrap(true);
-
- SmallVector<ExtractValueInst *, 4> ToDelete;
-
- for (auto *U : WO->users()) {
- if (auto *EVI = dyn_cast<ExtractValueInst>(U)) {
- if (EVI->getIndices()[0] == 1)
- EVI->replaceAllUsesWith(ConstantInt::getFalse(WO->getContext()));
- else {
- assert(EVI->getIndices()[0] == 0 && "Only two possibilities!");
- EVI->replaceAllUsesWith(NewResult);
- }
- ToDelete.push_back(EVI);
- }
- }
-
- for (auto *EVI : ToDelete)
- EVI->eraseFromParent();
-
- if (WO->use_empty())
- WO->eraseFromParent();
-
- return true;
-}
-
-bool SimplifyIndvar::eliminateSaturatingIntrinsic(SaturatingInst *SI) {
- const SCEV *LHS = SE->getSCEV(SI->getLHS());
- const SCEV *RHS = SE->getSCEV(SI->getRHS());
- if (!willNotOverflow(SE, SI->getBinaryOp(), SI->isSigned(), LHS, RHS))
- return false;
-
- BinaryOperator *BO = BinaryOperator::Create(
- SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI);
- if (SI->isSigned())
- BO->setHasNoSignedWrap();
- else
- BO->setHasNoUnsignedWrap();
-
- SI->replaceAllUsesWith(BO);
- DeadInsts.emplace_back(SI);
- Changed = true;
- return true;
-}
-
-bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
- // It is always legal to replace
- // icmp <pred> i32 trunc(iv), n
- // with
- // icmp <pred> i64 sext(trunc(iv)), sext(n), if pred is signed predicate.
- // Or with
- // icmp <pred> i64 zext(trunc(iv)), zext(n), if pred is unsigned predicate.
- // Or with either of these if pred is an equality predicate.
- //
- // If we can prove that iv == sext(trunc(iv)) or iv == zext(trunc(iv)) for
- // every comparison which uses trunc, it means that we can replace each of
- // them with comparison of iv against sext/zext(n). We no longer need trunc
- // after that.
- //
- // TODO: Should we do this if we can widen *some* comparisons, but not all
- // of them? Sometimes it is enough to enable other optimizations, but the
- // trunc instruction will stay in the loop.
- Value *IV = TI->getOperand(0);
- Type *IVTy = IV->getType();
- const SCEV *IVSCEV = SE->getSCEV(IV);
- const SCEV *TISCEV = SE->getSCEV(TI);
-
- // Check if iv == zext(trunc(iv)) and if iv == sext(trunc(iv)). If so, we can
- // get rid of trunc
- bool DoesSExtCollapse = false;
- bool DoesZExtCollapse = false;
- if (IVSCEV == SE->getSignExtendExpr(TISCEV, IVTy))
- DoesSExtCollapse = true;
- if (IVSCEV == SE->getZeroExtendExpr(TISCEV, IVTy))
- DoesZExtCollapse = true;
-
- // If neither sext nor zext does collapse, it is not profitable to do any
- // transform. Bail.
- if (!DoesSExtCollapse && !DoesZExtCollapse)
- return false;
-
- // Collect users of the trunc that look like comparisons against invariants.
- // Bail if we find something different.
- SmallVector<ICmpInst *, 4> ICmpUsers;
- for (auto *U : TI->users()) {
- // We don't care about users in unreachable blocks.
- if (isa<Instruction>(U) &&
- !DT->isReachableFromEntry(cast<Instruction>(U)->getParent()))
- continue;
- ICmpInst *ICI = dyn_cast<ICmpInst>(U);
- if (!ICI) return false;
- assert(L->contains(ICI->getParent()) && "LCSSA form broken?");
- if (!(ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) &&
- !(ICI->getOperand(1) == TI && L->isLoopInvariant(ICI->getOperand(0))))
- return false;
- // If we cannot get rid of trunc, bail.
- if (ICI->isSigned() && !DoesSExtCollapse)
- return false;
- if (ICI->isUnsigned() && !DoesZExtCollapse)
- return false;
- // For equality, either signed or unsigned works.
- ICmpUsers.push_back(ICI);
- }
-
- auto CanUseZExt = [&](ICmpInst *ICI) {
- // Unsigned comparison can be widened as unsigned.
- if (ICI->isUnsigned())
- return true;
- // Is it profitable to do zext?
- if (!DoesZExtCollapse)
- return false;
- // For equality, we can safely zext both parts.
- if (ICI->isEquality())
- return true;
- // Otherwise we can only use zext when comparing two non-negative or two
- // negative values. But in practice, we will never pass DoesZExtCollapse
- // check for a negative value, because zext(trunc(x)) is non-negative. So
- // it only make sense to check for non-negativity here.
- const SCEV *SCEVOP1 = SE->getSCEV(ICI->getOperand(0));
- const SCEV *SCEVOP2 = SE->getSCEV(ICI->getOperand(1));
- return SE->isKnownNonNegative(SCEVOP1) && SE->isKnownNonNegative(SCEVOP2);
- };
- // Replace all comparisons against trunc with comparisons against IV.
- for (auto *ICI : ICmpUsers) {
- bool IsSwapped = L->isLoopInvariant(ICI->getOperand(0));
- auto *Op1 = IsSwapped ? ICI->getOperand(0) : ICI->getOperand(1);
- Instruction *Ext = nullptr;
- // For signed/unsigned predicate, replace the old comparison with comparison
- // of immediate IV against sext/zext of the invariant argument. If we can
- // use either sext or zext (i.e. we are dealing with equality predicate),
- // then prefer zext as a more canonical form.
- // TODO: If we see a signed comparison which can be turned into unsigned,
- // we can do it here for canonicalization purposes.
- ICmpInst::Predicate Pred = ICI->getPredicate();
- if (IsSwapped) Pred = ICmpInst::getSwappedPredicate(Pred);
- if (CanUseZExt(ICI)) {
- assert(DoesZExtCollapse && "Unprofitable zext?");
- Ext = new ZExtInst(Op1, IVTy, "zext", ICI);
- Pred = ICmpInst::getUnsignedPredicate(Pred);
- } else {
- assert(DoesSExtCollapse && "Unprofitable sext?");
- Ext = new SExtInst(Op1, IVTy, "sext", ICI);
- assert(Pred == ICmpInst::getSignedPredicate(Pred) && "Must be signed!");
- }
- bool Changed;
- L->makeLoopInvariant(Ext, Changed);
- (void)Changed;
- ICmpInst *NewICI = new ICmpInst(ICI, Pred, IV, Ext);
- ICI->replaceAllUsesWith(NewICI);
- DeadInsts.emplace_back(ICI);
- }
-
- // Trunc no longer needed.
- TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
- DeadInsts.emplace_back(TI);
- return true;
-}
-
-/// Eliminate an operation that consumes a simple IV and has no observable
-/// side-effect given the range of IV values. IVOperand is guaranteed SCEVable,
-/// but UseInst may not be.
-bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
- Instruction *IVOperand) {
- if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
- eliminateIVComparison(ICmp, IVOperand);
- return true;
- }
- if (BinaryOperator *Bin = dyn_cast<BinaryOperator>(UseInst)) {
- bool IsSRem = Bin->getOpcode() == Instruction::SRem;
- if (IsSRem || Bin->getOpcode() == Instruction::URem) {
- simplifyIVRemainder(Bin, IVOperand, IsSRem);
- return true;
- }
-
- if (Bin->getOpcode() == Instruction::SDiv)
- return eliminateSDiv(Bin);
- }
-
- if (auto *WO = dyn_cast<WithOverflowInst>(UseInst))
- if (eliminateOverflowIntrinsic(WO))
- return true;
-
- if (auto *SI = dyn_cast<SaturatingInst>(UseInst))
- if (eliminateSaturatingIntrinsic(SI))
- return true;
-
- if (auto *TI = dyn_cast<TruncInst>(UseInst))
- if (eliminateTrunc(TI))
- return true;
-
- if (eliminateIdentitySCEV(UseInst, IVOperand))
- return true;
-
- return false;
-}
-
-static Instruction *GetLoopInvariantInsertPosition(Loop *L, Instruction *Hint) {
- if (auto *BB = L->getLoopPreheader())
- return BB->getTerminator();
-
- return Hint;
-}
-
-/// Replace the UseInst with a constant if possible.
-bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) {
- if (!SE->isSCEVable(I->getType()))
- return false;
-
- // Get the symbolic expression for this instruction.
- const SCEV *S = SE->getSCEV(I);
-
- if (!SE->isLoopInvariant(S, L))
- return false;
-
- // Do not generate something ridiculous even if S is loop invariant.
- if (Rewriter.isHighCostExpansion(S, L, I))
- return false;
-
- auto *IP = GetLoopInvariantInsertPosition(L, I);
- auto *Invariant = Rewriter.expandCodeFor(S, I->getType(), IP);
-
- I->replaceAllUsesWith(Invariant);
- LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I
- << " with loop invariant: " << *S << '\n');
- ++NumFoldedUser;
- Changed = true;
- DeadInsts.emplace_back(I);
- return true;
-}
-
-/// Eliminate any operation that SCEV can prove is an identity function.
-bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
- Instruction *IVOperand) {
- if (!SE->isSCEVable(UseInst->getType()) ||
- (UseInst->getType() != IVOperand->getType()) ||
- (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
- return false;
-
- // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the
- // dominator tree, even if X is an operand to Y. For instance, in
- //
- // %iv = phi i32 {0,+,1}
- // br %cond, label %left, label %merge
- //
- // left:
- // %X = add i32 %iv, 0
- // br label %merge
- //
- // merge:
- // %M = phi (%X, %iv)
- //
- // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and
- // %M.replaceAllUsesWith(%X) would be incorrect.
-
- if (isa<PHINode>(UseInst))
- // If UseInst is not a PHI node then we know that IVOperand dominates
- // UseInst directly from the legality of SSA.
- if (!DT || !DT->dominates(IVOperand, UseInst))
- return false;
-
- if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand))
- return false;
-
- LLVM_DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
-
- UseInst->replaceAllUsesWith(IVOperand);
- ++NumElimIdentity;
- Changed = true;
- DeadInsts.emplace_back(UseInst);
- return true;
-}
-
-/// Annotate BO with nsw / nuw if it provably does not signed-overflow /
-/// unsigned-overflow. Returns true if anything changed, false otherwise.
-bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
- Value *IVOperand) {
- // Fastpath: we don't have any work to do if `BO` is `nuw` and `nsw`.
- if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap())
- return false;
-
- if (BO->getOpcode() != Instruction::Add &&
- BO->getOpcode() != Instruction::Sub &&
- BO->getOpcode() != Instruction::Mul)
- return false;
-
- const SCEV *LHS = SE->getSCEV(BO->getOperand(0));
- const SCEV *RHS = SE->getSCEV(BO->getOperand(1));
- bool Changed = false;
-
- if (!BO->hasNoUnsignedWrap() &&
- willNotOverflow(SE, BO->getOpcode(), /* Signed */ false, LHS, RHS)) {
- BO->setHasNoUnsignedWrap();
- SE->forgetValue(BO);
- Changed = true;
- }
-
- if (!BO->hasNoSignedWrap() &&
- willNotOverflow(SE, BO->getOpcode(), /* Signed */ true, LHS, RHS)) {
- BO->setHasNoSignedWrap();
- SE->forgetValue(BO);
- Changed = true;
- }
-
- return Changed;
-}
-
-/// Annotate the Shr in (X << IVOperand) >> C as exact using the
-/// information from the IV's range. Returns true if anything changed, false
-/// otherwise.
-bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO,
- Value *IVOperand) {
- using namespace llvm::PatternMatch;
-
- if (BO->getOpcode() == Instruction::Shl) {
- bool Changed = false;
- ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand));
- for (auto *U : BO->users()) {
- const APInt *C;
- if (match(U,
- m_AShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C))) ||
- match(U,
- m_LShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C)))) {
- BinaryOperator *Shr = cast<BinaryOperator>(U);
- if (!Shr->isExact() && IVRange.getUnsignedMin().uge(*C)) {
- Shr->setIsExact(true);
- Changed = true;
- }
- }
- }
- return Changed;
- }
-
- return false;
-}
-
-/// Add all uses of Def to the current IV's worklist.
-static void pushIVUsers(
- Instruction *Def, Loop *L,
- SmallPtrSet<Instruction*,16> &Simplified,
- SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
-
- for (User *U : Def->users()) {
- Instruction *UI = cast<Instruction>(U);
-
- // Avoid infinite or exponential worklist processing.
- // Also ensure unique worklist users.
- // If Def is a LoopPhi, it may not be in the Simplified set, so check for
- // self edges first.
- if (UI == Def)
- continue;
-
- // Only change the current Loop, do not change the other parts (e.g. other
- // Loops).
- if (!L->contains(UI))
- continue;
-
- // Do not push the same instruction more than once.
- if (!Simplified.insert(UI).second)
- continue;
-
- SimpleIVUsers.push_back(std::make_pair(UI, Def));
- }
-}
-
-/// Return true if this instruction generates a simple SCEV
-/// expression in terms of that IV.
-///
-/// This is similar to IVUsers' isInteresting() but processes each instruction
-/// non-recursively when the operand is already known to be a simpleIVUser.
-///
-static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
- if (!SE->isSCEVable(I->getType()))
- return false;
-
- // Get the symbolic expression for this instruction.
- const SCEV *S = SE->getSCEV(I);
-
- // Only consider affine recurrences.
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
- if (AR && AR->getLoop() == L)
- return true;
-
- return false;
-}
-
-/// Iteratively perform simplification on a worklist of users
-/// of the specified induction variable. Each successive simplification may push
-/// more users which may themselves be candidates for simplification.
-///
-/// This algorithm does not require IVUsers analysis. Instead, it simplifies
-/// instructions in-place during analysis. Rather than rewriting induction
-/// variables bottom-up from their users, it transforms a chain of IVUsers
-/// top-down, updating the IR only when it encounters a clear optimization
-/// opportunity.
-///
-/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
-///
-void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
- if (!SE->isSCEVable(CurrIV->getType()))
- return;
-
- // Instructions processed by SimplifyIndvar for CurrIV.
- SmallPtrSet<Instruction*,16> Simplified;
-
- // Use-def pairs if IV users waiting to be processed for CurrIV.
- SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
-
- // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
- // called multiple times for the same LoopPhi. This is the proper thing to
- // do for loop header phis that use each other.
- pushIVUsers(CurrIV, L, Simplified, SimpleIVUsers);
-
- while (!SimpleIVUsers.empty()) {
- std::pair<Instruction*, Instruction*> UseOper =
- SimpleIVUsers.pop_back_val();
- Instruction *UseInst = UseOper.first;
-
- // If a user of the IndVar is trivially dead, we prefer just to mark it dead
- // rather than try to do some complex analysis or transformation (such as
- // widening) basing on it.
- // TODO: Propagate TLI and pass it here to handle more cases.
- if (isInstructionTriviallyDead(UseInst, /* TLI */ nullptr)) {
- DeadInsts.emplace_back(UseInst);
- continue;
- }
-
- // Bypass back edges to avoid extra work.
- if (UseInst == CurrIV) continue;
-
- // Try to replace UseInst with a loop invariant before any other
- // simplifications.
- if (replaceIVUserWithLoopInvariant(UseInst))
- continue;
-
- Instruction *IVOperand = UseOper.second;
- for (unsigned N = 0; IVOperand; ++N) {
- assert(N <= Simplified.size() && "runaway iteration");
-
- Value *NewOper = foldIVUser(UseInst, IVOperand);
- if (!NewOper)
- break; // done folding
- IVOperand = dyn_cast<Instruction>(NewOper);
- }
- if (!IVOperand)
- continue;
-
- if (eliminateIVUser(UseInst, IVOperand)) {
- pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
- continue;
- }
-
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseInst)) {
- if ((isa<OverflowingBinaryOperator>(BO) &&
- strengthenOverflowingOperation(BO, IVOperand)) ||
- (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand))) {
- // re-queue uses of the now modified binary operator and fall
- // through to the checks that remain.
- pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
- }
- }
-
- CastInst *Cast = dyn_cast<CastInst>(UseInst);
- if (V && Cast) {
- V->visitCast(Cast);
- continue;
- }
- if (isSimpleIVUser(UseInst, L, SE)) {
- pushIVUsers(UseInst, L, Simplified, SimpleIVUsers);
- }
- }
-}
-
-namespace llvm {
-
-void IVVisitor::anchor() { }
-
-/// Simplify instructions that use this induction variable
-/// by using ScalarEvolution to analyze the IV's recurrence.
-bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
- LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead,
- SCEVExpander &Rewriter, IVVisitor *V) {
- SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Rewriter,
- Dead);
- SIV.simplifyUsers(CurrIV, V);
- return SIV.hasChanged();
-}
-
-/// Simplify users of induction variables within this
-/// loop. This does not actually change or add IVs.
-bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
- LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead) {
- SCEVExpander Rewriter(*SE, SE->getDataLayout(), "indvars");
-#ifndef NDEBUG
- Rewriter.setDebugType(DEBUG_TYPE);
-#endif
- bool Changed = false;
- for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
- Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead, Rewriter);
- }
- return Changed;
-}
-
-} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
deleted file mode 100644
index e938ae6cb42f..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ /dev/null
@@ -1,3159 +0,0 @@
-//===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the library calls simplifier. It does not implement
-// any pass, but can't be used by other passes to do simplifications.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
-#include "llvm/ADT/APSInt.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/Loads.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/PatternMatch.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/KnownBits.h"
-#include "llvm/Transforms/Utils/BuildLibCalls.h"
-#include "llvm/Transforms/Utils/SizeOpts.h"
-
-using namespace llvm;
-using namespace PatternMatch;
-
-static cl::opt<bool>
- EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
- cl::init(false),
- cl::desc("Enable unsafe double to float "
- "shrinking for math lib calls"));
-
-
-//===----------------------------------------------------------------------===//
-// Helper Functions
-//===----------------------------------------------------------------------===//
-
-static bool ignoreCallingConv(LibFunc Func) {
- return Func == LibFunc_abs || Func == LibFunc_labs ||
- Func == LibFunc_llabs || Func == LibFunc_strlen;
-}
-
-static bool isCallingConvCCompatible(CallInst *CI) {
- switch(CI->getCallingConv()) {
- default:
- return false;
- case llvm::CallingConv::C:
- return true;
- case llvm::CallingConv::ARM_APCS:
- case llvm::CallingConv::ARM_AAPCS:
- case llvm::CallingConv::ARM_AAPCS_VFP: {
-
- // The iOS ABI diverges from the standard in some cases, so for now don't
- // try to simplify those calls.
- if (Triple(CI->getModule()->getTargetTriple()).isiOS())
- return false;
-
- auto *FuncTy = CI->getFunctionType();
-
- if (!FuncTy->getReturnType()->isPointerTy() &&
- !FuncTy->getReturnType()->isIntegerTy() &&
- !FuncTy->getReturnType()->isVoidTy())
- return false;
-
- for (auto Param : FuncTy->params()) {
- if (!Param->isPointerTy() && !Param->isIntegerTy())
- return false;
- }
- return true;
- }
- }
- return false;
-}
-
-/// Return true if it is only used in equality comparisons with With.
-static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
- for (User *U : V->users()) {
- if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
- if (IC->isEquality() && IC->getOperand(1) == With)
- continue;
- // Unknown instruction.
- return false;
- }
- return true;
-}
-
-static bool callHasFloatingPointArgument(const CallInst *CI) {
- return any_of(CI->operands(), [](const Use &OI) {
- return OI->getType()->isFloatingPointTy();
- });
-}
-
-static bool callHasFP128Argument(const CallInst *CI) {
- return any_of(CI->operands(), [](const Use &OI) {
- return OI->getType()->isFP128Ty();
- });
-}
-
-static Value *convertStrToNumber(CallInst *CI, StringRef &Str, int64_t Base) {
- if (Base < 2 || Base > 36)
- // handle special zero base
- if (Base != 0)
- return nullptr;
-
- char *End;
- std::string nptr = Str.str();
- errno = 0;
- long long int Result = strtoll(nptr.c_str(), &End, Base);
- if (errno)
- return nullptr;
-
- // if we assume all possible target locales are ASCII supersets,
- // then if strtoll successfully parses a number on the host,
- // it will also successfully parse the same way on the target
- if (*End != '\0')
- return nullptr;
-
- if (!isIntN(CI->getType()->getPrimitiveSizeInBits(), Result))
- return nullptr;
-
- return ConstantInt::get(CI->getType(), Result);
-}
-
-static bool isLocallyOpenedFile(Value *File, CallInst *CI, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- CallInst *FOpen = dyn_cast<CallInst>(File);
- if (!FOpen)
- return false;
-
- Function *InnerCallee = FOpen->getCalledFunction();
- if (!InnerCallee)
- return false;
-
- LibFunc Func;
- if (!TLI->getLibFunc(*InnerCallee, Func) || !TLI->has(Func) ||
- Func != LibFunc_fopen)
- return false;
-
- inferLibFuncAttributes(*CI->getCalledFunction(), *TLI);
- if (PointerMayBeCaptured(File, true, true))
- return false;
-
- return true;
-}
-
-static bool isOnlyUsedInComparisonWithZero(Value *V) {
- for (User *U : V->users()) {
- if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
- if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
- if (C->isNullValue())
- continue;
- // Unknown instruction.
- return false;
- }
- return true;
-}
-
-static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
- const DataLayout &DL) {
- if (!isOnlyUsedInComparisonWithZero(CI))
- return false;
-
- if (!isDereferenceableAndAlignedPointer(Str, 1, APInt(64, Len), DL))
- return false;
-
- if (CI->getFunction()->hasFnAttribute(Attribute::SanitizeMemory))
- return false;
-
- return true;
-}
-
-//===----------------------------------------------------------------------===//
-// String and Memory Library Call Optimizations
-//===----------------------------------------------------------------------===//
-
-Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) {
- // Extract some information from the instruction
- Value *Dst = CI->getArgOperand(0);
- Value *Src = CI->getArgOperand(1);
-
- // See if we can get the length of the input string.
- uint64_t Len = GetStringLength(Src);
- if (Len == 0)
- return nullptr;
- --Len; // Unbias length.
-
- // Handle the simple, do-nothing case: strcat(x, "") -> x
- if (Len == 0)
- return Dst;
-
- return emitStrLenMemCpy(Src, Dst, Len, B);
-}
-
-Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
- IRBuilder<> &B) {
- // We need to find the end of the destination string. That's where the
- // memory is to be moved to. We just generate a call to strlen.
- Value *DstLen = emitStrLen(Dst, B, DL, TLI);
- if (!DstLen)
- return nullptr;
-
- // Now that we have the destination's length, we must index into the
- // destination's pointer to get the actual memcpy destination (end of
- // the string .. we're concatenating).
- Value *CpyDst = B.CreateGEP(B.getInt8Ty(), Dst, DstLen, "endptr");
-
- // We have enough information to now generate the memcpy call to do the
- // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
- B.CreateMemCpy(CpyDst, 1, Src, 1,
- ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1));
- return Dst;
-}
-
-Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) {
- // Extract some information from the instruction.
- Value *Dst = CI->getArgOperand(0);
- Value *Src = CI->getArgOperand(1);
- uint64_t Len;
-
- // We don't do anything if length is not constant.
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
- Len = LengthArg->getZExtValue();
- else
- return nullptr;
-
- // See if we can get the length of the input string.
- uint64_t SrcLen = GetStringLength(Src);
- if (SrcLen == 0)
- return nullptr;
- --SrcLen; // Unbias length.
-
- // Handle the simple, do-nothing cases:
- // strncat(x, "", c) -> x
- // strncat(x, c, 0) -> x
- if (SrcLen == 0 || Len == 0)
- return Dst;
-
- // We don't optimize this case.
- if (Len < SrcLen)
- return nullptr;
-
- // strncat(x, s, c) -> strcat(x, s)
- // s is constant so the strcat can be optimized further.
- return emitStrLenMemCpy(Src, Dst, SrcLen, B);
-}
-
-Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- Value *SrcStr = CI->getArgOperand(0);
-
- // If the second operand is non-constant, see if we can compute the length
- // of the input string and turn this into memchr.
- ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
- if (!CharC) {
- uint64_t Len = GetStringLength(SrcStr);
- if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32.
- return nullptr;
-
- return emitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
- ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len),
- B, DL, TLI);
- }
-
- // Otherwise, the character is a constant, see if the first argument is
- // a string literal. If so, we can constant fold.
- StringRef Str;
- if (!getConstantStringInfo(SrcStr, Str)) {
- if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
- return B.CreateGEP(B.getInt8Ty(), SrcStr, emitStrLen(SrcStr, B, DL, TLI),
- "strchr");
- return nullptr;
- }
-
- // Compute the offset, make sure to handle the case when we're searching for
- // zero (a weird way to spell strlen).
- size_t I = (0xFF & CharC->getSExtValue()) == 0
- ? Str.size()
- : Str.find(CharC->getSExtValue());
- if (I == StringRef::npos) // Didn't find the char. strchr returns null.
- return Constant::getNullValue(CI->getType());
-
- // strchr(s+n,c) -> gep(s+n+i,c)
- return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr");
-}
-
-Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
- Value *SrcStr = CI->getArgOperand(0);
- ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
-
- // Cannot fold anything if we're not looking for a constant.
- if (!CharC)
- return nullptr;
-
- StringRef Str;
- if (!getConstantStringInfo(SrcStr, Str)) {
- // strrchr(s, 0) -> strchr(s, 0)
- if (CharC->isZero())
- return emitStrChr(SrcStr, '\0', B, TLI);
- return nullptr;
- }
-
- // Compute the offset.
- size_t I = (0xFF & CharC->getSExtValue()) == 0
- ? Str.size()
- : Str.rfind(CharC->getSExtValue());
- if (I == StringRef::npos) // Didn't find the char. Return null.
- return Constant::getNullValue(CI->getType());
-
- // strrchr(s+n,c) -> gep(s+n+i,c)
- return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strrchr");
-}
-
-Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
- Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
- if (Str1P == Str2P) // strcmp(x,x) -> 0
- return ConstantInt::get(CI->getType(), 0);
-
- StringRef Str1, Str2;
- bool HasStr1 = getConstantStringInfo(Str1P, Str1);
- bool HasStr2 = getConstantStringInfo(Str2P, Str2);
-
- // strcmp(x, y) -> cnst (if both x and y are constant strings)
- if (HasStr1 && HasStr2)
- return ConstantInt::get(CI->getType(), Str1.compare(Str2));
-
- if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
- return B.CreateNeg(B.CreateZExt(
- B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType()));
-
- if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
- return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"),
- CI->getType());
-
- // strcmp(P, "x") -> memcmp(P, "x", 2)
- uint64_t Len1 = GetStringLength(Str1P);
- uint64_t Len2 = GetStringLength(Str2P);
- if (Len1 && Len2) {
- return emitMemCmp(Str1P, Str2P,
- ConstantInt::get(DL.getIntPtrType(CI->getContext()),
- std::min(Len1, Len2)),
- B, DL, TLI);
- }
-
- // strcmp to memcmp
- if (!HasStr1 && HasStr2) {
- if (canTransformToMemCmp(CI, Str1P, Len2, DL))
- return emitMemCmp(
- Str1P, Str2P,
- ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), B, DL,
- TLI);
- } else if (HasStr1 && !HasStr2) {
- if (canTransformToMemCmp(CI, Str2P, Len1, DL))
- return emitMemCmp(
- Str1P, Str2P,
- ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), B, DL,
- TLI);
- }
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
- Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
- if (Str1P == Str2P) // strncmp(x,x,n) -> 0
- return ConstantInt::get(CI->getType(), 0);
-
- // Get the length argument if it is constant.
- uint64_t Length;
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
- Length = LengthArg->getZExtValue();
- else
- return nullptr;
-
- if (Length == 0) // strncmp(x,y,0) -> 0
- return ConstantInt::get(CI->getType(), 0);
-
- if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
- return emitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI);
-
- StringRef Str1, Str2;
- bool HasStr1 = getConstantStringInfo(Str1P, Str1);
- bool HasStr2 = getConstantStringInfo(Str2P, Str2);
-
- // strncmp(x, y) -> cnst (if both x and y are constant strings)
- if (HasStr1 && HasStr2) {
- StringRef SubStr1 = Str1.substr(0, Length);
- StringRef SubStr2 = Str2.substr(0, Length);
- return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
- }
-
- if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
- return B.CreateNeg(B.CreateZExt(
- B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType()));
-
- if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
- return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"),
- CI->getType());
-
- uint64_t Len1 = GetStringLength(Str1P);
- uint64_t Len2 = GetStringLength(Str2P);
-
- // strncmp to memcmp
- if (!HasStr1 && HasStr2) {
- Len2 = std::min(Len2, Length);
- if (canTransformToMemCmp(CI, Str1P, Len2, DL))
- return emitMemCmp(
- Str1P, Str2P,
- ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), B, DL,
- TLI);
- } else if (HasStr1 && !HasStr2) {
- Len1 = std::min(Len1, Length);
- if (canTransformToMemCmp(CI, Str2P, Len1, DL))
- return emitMemCmp(
- Str1P, Str2P,
- ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), B, DL,
- TLI);
- }
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
- Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
- if (Dst == Src) // strcpy(x,x) -> x
- return Src;
-
- // See if we can get the length of the input string.
- uint64_t Len = GetStringLength(Src);
- if (Len == 0)
- return nullptr;
-
- // We have enough information to now generate the memcpy call to do the
- // copy for us. Make a memcpy to copy the nul byte with align = 1.
- B.CreateMemCpy(Dst, 1, Src, 1,
- ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
- return Dst;
-}
-
-Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
- if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
- Value *StrLen = emitStrLen(Src, B, DL, TLI);
- return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
- }
-
- // See if we can get the length of the input string.
- uint64_t Len = GetStringLength(Src);
- if (Len == 0)
- return nullptr;
-
- Type *PT = Callee->getFunctionType()->getParamType(0);
- Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len);
- Value *DstEnd = B.CreateGEP(B.getInt8Ty(), Dst,
- ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
-
- // We have enough information to now generate the memcpy call to do the
- // copy for us. Make a memcpy to copy the nul byte with align = 1.
- B.CreateMemCpy(Dst, 1, Src, 1, LenV);
- return DstEnd;
-}
-
-Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- Value *Dst = CI->getArgOperand(0);
- Value *Src = CI->getArgOperand(1);
- Value *LenOp = CI->getArgOperand(2);
-
- // See if we can get the length of the input string.
- uint64_t SrcLen = GetStringLength(Src);
- if (SrcLen == 0)
- return nullptr;
- --SrcLen;
-
- if (SrcLen == 0) {
- // strncpy(x, "", y) -> memset(align 1 x, '\0', y)
- B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
- return Dst;
- }
-
- uint64_t Len;
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
- Len = LengthArg->getZExtValue();
- else
- return nullptr;
-
- if (Len == 0)
- return Dst; // strncpy(x, y, 0) -> x
-
- // Let strncpy handle the zero padding
- if (Len > SrcLen + 1)
- return nullptr;
-
- Type *PT = Callee->getFunctionType()->getParamType(0);
- // strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant]
- B.CreateMemCpy(Dst, 1, Src, 1, ConstantInt::get(DL.getIntPtrType(PT), Len));
-
- return Dst;
-}
-
-Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B,
- unsigned CharSize) {
- Value *Src = CI->getArgOperand(0);
-
- // Constant folding: strlen("xyz") -> 3
- if (uint64_t Len = GetStringLength(Src, CharSize))
- return ConstantInt::get(CI->getType(), Len - 1);
-
- // If s is a constant pointer pointing to a string literal, we can fold
- // strlen(s + x) to strlen(s) - x, when x is known to be in the range
- // [0, strlen(s)] or the string has a single null terminator '\0' at the end.
- // We only try to simplify strlen when the pointer s points to an array
- // of i8. Otherwise, we would need to scale the offset x before doing the
- // subtraction. This will make the optimization more complex, and it's not
- // very useful because calling strlen for a pointer of other types is
- // very uncommon.
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) {
- if (!isGEPBasedOnPointerToString(GEP, CharSize))
- return nullptr;
-
- ConstantDataArraySlice Slice;
- if (getConstantDataArrayInfo(GEP->getOperand(0), Slice, CharSize)) {
- uint64_t NullTermIdx;
- if (Slice.Array == nullptr) {
- NullTermIdx = 0;
- } else {
- NullTermIdx = ~((uint64_t)0);
- for (uint64_t I = 0, E = Slice.Length; I < E; ++I) {
- if (Slice.Array->getElementAsInteger(I + Slice.Offset) == 0) {
- NullTermIdx = I;
- break;
- }
- }
- // If the string does not have '\0', leave it to strlen to compute
- // its length.
- if (NullTermIdx == ~((uint64_t)0))
- return nullptr;
- }
-
- Value *Offset = GEP->getOperand(2);
- KnownBits Known = computeKnownBits(Offset, DL, 0, nullptr, CI, nullptr);
- Known.Zero.flipAllBits();
- uint64_t ArrSize =
- cast<ArrayType>(GEP->getSourceElementType())->getNumElements();
-
- // KnownZero's bits are flipped, so zeros in KnownZero now represent
- // bits known to be zeros in Offset, and ones in KnowZero represent
- // bits unknown in Offset. Therefore, Offset is known to be in range
- // [0, NullTermIdx] when the flipped KnownZero is non-negative and
- // unsigned-less-than NullTermIdx.
- //
- // If Offset is not provably in the range [0, NullTermIdx], we can still
- // optimize if we can prove that the program has undefined behavior when
- // Offset is outside that range. That is the case when GEP->getOperand(0)
- // is a pointer to an object whose memory extent is NullTermIdx+1.
- if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) ||
- (GEP->isInBounds() && isa<GlobalVariable>(GEP->getOperand(0)) &&
- NullTermIdx == ArrSize - 1)) {
- Offset = B.CreateSExtOrTrunc(Offset, CI->getType());
- return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx),
- Offset);
- }
- }
-
- return nullptr;
- }
-
- // strlen(x?"foo":"bars") --> x ? 3 : 4
- if (SelectInst *SI = dyn_cast<SelectInst>(Src)) {
- uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize);
- uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize);
- if (LenTrue && LenFalse) {
- ORE.emit([&]() {
- return OptimizationRemark("instcombine", "simplify-libcalls", CI)
- << "folded strlen(select) to select of constants";
- });
- return B.CreateSelect(SI->getCondition(),
- ConstantInt::get(CI->getType(), LenTrue - 1),
- ConstantInt::get(CI->getType(), LenFalse - 1));
- }
- }
-
- // strlen(x) != 0 --> *x != 0
- // strlen(x) == 0 --> *x == 0
- if (isOnlyUsedInZeroEqualityComparison(CI))
- return B.CreateZExt(B.CreateLoad(B.getIntNTy(CharSize), Src, "strlenfirst"),
- CI->getType());
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
- return optimizeStringLength(CI, B, 8);
-}
-
-Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) {
- Module &M = *CI->getModule();
- unsigned WCharSize = TLI->getWCharSize(M) * 8;
- // We cannot perform this optimization without wchar_size metadata.
- if (WCharSize == 0)
- return nullptr;
-
- return optimizeStringLength(CI, B, WCharSize);
-}
-
-Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) {
- StringRef S1, S2;
- bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
- bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
-
- // strpbrk(s, "") -> nullptr
- // strpbrk("", s) -> nullptr
- if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
- return Constant::getNullValue(CI->getType());
-
- // Constant folding.
- if (HasS1 && HasS2) {
- size_t I = S1.find_first_of(S2);
- if (I == StringRef::npos) // No match.
- return Constant::getNullValue(CI->getType());
-
- return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I),
- "strpbrk");
- }
-
- // strpbrk(s, "a") -> strchr(s, 'a')
- if (HasS2 && S2.size() == 1)
- return emitStrChr(CI->getArgOperand(0), S2[0], B, TLI);
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilder<> &B) {
- Value *EndPtr = CI->getArgOperand(1);
- if (isa<ConstantPointerNull>(EndPtr)) {
- // With a null EndPtr, this function won't capture the main argument.
- // It would be readonly too, except that it still may write to errno.
- CI->addParamAttr(0, Attribute::NoCapture);
- }
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeStrSpn(CallInst *CI, IRBuilder<> &B) {
- StringRef S1, S2;
- bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
- bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
-
- // strspn(s, "") -> 0
- // strspn("", s) -> 0
- if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
- return Constant::getNullValue(CI->getType());
-
- // Constant folding.
- if (HasS1 && HasS2) {
- size_t Pos = S1.find_first_not_of(S2);
- if (Pos == StringRef::npos)
- Pos = S1.size();
- return ConstantInt::get(CI->getType(), Pos);
- }
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilder<> &B) {
- StringRef S1, S2;
- bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
- bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
-
- // strcspn("", s) -> 0
- if (HasS1 && S1.empty())
- return Constant::getNullValue(CI->getType());
-
- // Constant folding.
- if (HasS1 && HasS2) {
- size_t Pos = S1.find_first_of(S2);
- if (Pos == StringRef::npos)
- Pos = S1.size();
- return ConstantInt::get(CI->getType(), Pos);
- }
-
- // strcspn(s, "") -> strlen(s)
- if (HasS2 && S2.empty())
- return emitStrLen(CI->getArgOperand(0), B, DL, TLI);
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) {
- // fold strstr(x, x) -> x.
- if (CI->getArgOperand(0) == CI->getArgOperand(1))
- return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
-
- // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
- if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
- Value *StrLen = emitStrLen(CI->getArgOperand(1), B, DL, TLI);
- if (!StrLen)
- return nullptr;
- Value *StrNCmp = emitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
- StrLen, B, DL, TLI);
- if (!StrNCmp)
- return nullptr;
- for (auto UI = CI->user_begin(), UE = CI->user_end(); UI != UE;) {
- ICmpInst *Old = cast<ICmpInst>(*UI++);
- Value *Cmp =
- B.CreateICmp(Old->getPredicate(), StrNCmp,
- ConstantInt::getNullValue(StrNCmp->getType()), "cmp");
- replaceAllUsesWith(Old, Cmp);
- }
- return CI;
- }
-
- // See if either input string is a constant string.
- StringRef SearchStr, ToFindStr;
- bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr);
- bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr);
-
- // fold strstr(x, "") -> x.
- if (HasStr2 && ToFindStr.empty())
- return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
-
- // If both strings are known, constant fold it.
- if (HasStr1 && HasStr2) {
- size_t Offset = SearchStr.find(ToFindStr);
-
- if (Offset == StringRef::npos) // strstr("foo", "bar") -> null
- return Constant::getNullValue(CI->getType());
-
- // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
- Value *Result = castToCStr(CI->getArgOperand(0), B);
- Result =
- B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), Result, Offset, "strstr");
- return B.CreateBitCast(Result, CI->getType());
- }
-
- // fold strstr(x, "y") -> strchr(x, 'y').
- if (HasStr2 && ToFindStr.size() == 1) {
- Value *StrChr = emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI);
- return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr;
- }
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
- Value *SrcStr = CI->getArgOperand(0);
- ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
- ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
-
- // memchr(x, y, 0) -> null
- if (LenC && LenC->isZero())
- return Constant::getNullValue(CI->getType());
-
- // From now on we need at least constant length and string.
- StringRef Str;
- if (!LenC || !getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false))
- return nullptr;
-
- // Truncate the string to LenC. If Str is smaller than LenC we will still only
- // scan the string, as reading past the end of it is undefined and we can just
- // return null if we don't find the char.
- Str = Str.substr(0, LenC->getZExtValue());
-
- // If the char is variable but the input str and length are not we can turn
- // this memchr call into a simple bit field test. Of course this only works
- // when the return value is only checked against null.
- //
- // It would be really nice to reuse switch lowering here but we can't change
- // the CFG at this point.
- //
- // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n')))
- // != 0
- // after bounds check.
- if (!CharC && !Str.empty() && isOnlyUsedInZeroEqualityComparison(CI)) {
- unsigned char Max =
- *std::max_element(reinterpret_cast<const unsigned char *>(Str.begin()),
- reinterpret_cast<const unsigned char *>(Str.end()));
-
- // Make sure the bit field we're about to create fits in a register on the
- // target.
- // FIXME: On a 64 bit architecture this prevents us from using the
- // interesting range of alpha ascii chars. We could do better by emitting
- // two bitfields or shifting the range by 64 if no lower chars are used.
- if (!DL.fitsInLegalInteger(Max + 1))
- return nullptr;
-
- // For the bit field use a power-of-2 type with at least 8 bits to avoid
- // creating unnecessary illegal types.
- unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max));
-
- // Now build the bit field.
- APInt Bitfield(Width, 0);
- for (char C : Str)
- Bitfield.setBit((unsigned char)C);
- Value *BitfieldC = B.getInt(Bitfield);
-
- // Adjust width of "C" to the bitfield width, then mask off the high bits.
- Value *C = B.CreateZExtOrTrunc(CI->getArgOperand(1), BitfieldC->getType());
- C = B.CreateAnd(C, B.getIntN(Width, 0xFF));
-
- // First check that the bit field access is within bounds.
- Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width),
- "memchr.bounds");
-
- // Create code that checks if the given bit is set in the field.
- Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C);
- Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits");
-
- // Finally merge both checks and cast to pointer type. The inttoptr
- // implicitly zexts the i1 to intptr type.
- return B.CreateIntToPtr(B.CreateAnd(Bounds, Bits, "memchr"), CI->getType());
- }
-
- // Check if all arguments are constants. If so, we can constant fold.
- if (!CharC)
- return nullptr;
-
- // Compute the offset.
- size_t I = Str.find(CharC->getSExtValue() & 0xFF);
- if (I == StringRef::npos) // Didn't find the char. memchr returns null.
- return Constant::getNullValue(CI->getType());
-
- // memchr(s+n,c,l) -> gep(s+n+i,c)
- return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr");
-}
-
-static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
- uint64_t Len, IRBuilder<> &B,
- const DataLayout &DL) {
- if (Len == 0) // memcmp(s1,s2,0) -> 0
- return Constant::getNullValue(CI->getType());
-
- // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
- if (Len == 1) {
- Value *LHSV =
- B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(LHS, B), "lhsc"),
- CI->getType(), "lhsv");
- Value *RHSV =
- B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(RHS, B), "rhsc"),
- CI->getType(), "rhsv");
- return B.CreateSub(LHSV, RHSV, "chardiff");
- }
-
- // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0
- // TODO: The case where both inputs are constants does not need to be limited
- // to legal integers or equality comparison. See block below this.
- if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) {
- IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8);
- unsigned PrefAlignment = DL.getPrefTypeAlignment(IntType);
-
- // First, see if we can fold either argument to a constant.
- Value *LHSV = nullptr;
- if (auto *LHSC = dyn_cast<Constant>(LHS)) {
- LHSC = ConstantExpr::getBitCast(LHSC, IntType->getPointerTo());
- LHSV = ConstantFoldLoadFromConstPtr(LHSC, IntType, DL);
- }
- Value *RHSV = nullptr;
- if (auto *RHSC = dyn_cast<Constant>(RHS)) {
- RHSC = ConstantExpr::getBitCast(RHSC, IntType->getPointerTo());
- RHSV = ConstantFoldLoadFromConstPtr(RHSC, IntType, DL);
- }
-
- // Don't generate unaligned loads. If either source is constant data,
- // alignment doesn't matter for that source because there is no load.
- if ((LHSV || getKnownAlignment(LHS, DL, CI) >= PrefAlignment) &&
- (RHSV || getKnownAlignment(RHS, DL, CI) >= PrefAlignment)) {
- if (!LHSV) {
- Type *LHSPtrTy =
- IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());
- LHSV = B.CreateLoad(IntType, B.CreateBitCast(LHS, LHSPtrTy), "lhsv");
- }
- if (!RHSV) {
- Type *RHSPtrTy =
- IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
- RHSV = B.CreateLoad(IntType, B.CreateBitCast(RHS, RHSPtrTy), "rhsv");
- }
- return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
- }
- }
-
- // Constant folding: memcmp(x, y, Len) -> constant (all arguments are const).
- // TODO: This is limited to i8 arrays.
- StringRef LHSStr, RHSStr;
- if (getConstantStringInfo(LHS, LHSStr) &&
- getConstantStringInfo(RHS, RHSStr)) {
- // Make sure we're not reading out-of-bounds memory.
- if (Len > LHSStr.size() || Len > RHSStr.size())
- return nullptr;
- // Fold the memcmp and normalize the result. This way we get consistent
- // results across multiple platforms.
- uint64_t Ret = 0;
- int Cmp = memcmp(LHSStr.data(), RHSStr.data(), Len);
- if (Cmp < 0)
- Ret = -1;
- else if (Cmp > 0)
- Ret = 1;
- return ConstantInt::get(CI->getType(), Ret);
- }
- return nullptr;
-}
-
-// Most simplifications for memcmp also apply to bcmp.
-Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI,
- IRBuilder<> &B) {
- Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
- Value *Size = CI->getArgOperand(2);
-
- if (LHS == RHS) // memcmp(s,s,x) -> 0
- return Constant::getNullValue(CI->getType());
-
- // Handle constant lengths.
- if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size))
- if (Value *Res = optimizeMemCmpConstantSize(CI, LHS, RHS,
- LenC->getZExtValue(), B, DL))
- return Res;
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
- if (Value *V = optimizeMemCmpBCmpCommon(CI, B))
- return V;
-
- // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0
- // `bcmp` can be more efficient than memcmp because it only has to know that
- // there is a difference, not where it is.
- if (isOnlyUsedInZeroEqualityComparison(CI) && TLI->has(LibFunc_bcmp)) {
- Value *LHS = CI->getArgOperand(0);
- Value *RHS = CI->getArgOperand(1);
- Value *Size = CI->getArgOperand(2);
- return emitBCmp(LHS, RHS, Size, B, DL, TLI);
- }
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilder<> &B) {
- return optimizeMemCmpBCmpCommon(CI, B);
-}
-
-Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
- // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n)
- B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
- CI->getArgOperand(2));
- return CI->getArgOperand(0);
-}
-
-Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
- // memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n)
- B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
- CI->getArgOperand(2));
- return CI->getArgOperand(0);
-}
-
-/// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n).
-Value *LibCallSimplifier::foldMallocMemset(CallInst *Memset, IRBuilder<> &B) {
- // This has to be a memset of zeros (bzero).
- auto *FillValue = dyn_cast<ConstantInt>(Memset->getArgOperand(1));
- if (!FillValue || FillValue->getZExtValue() != 0)
- return nullptr;
-
- // TODO: We should handle the case where the malloc has more than one use.
- // This is necessary to optimize common patterns such as when the result of
- // the malloc is checked against null or when a memset intrinsic is used in
- // place of a memset library call.
- auto *Malloc = dyn_cast<CallInst>(Memset->getArgOperand(0));
- if (!Malloc || !Malloc->hasOneUse())
- return nullptr;
-
- // Is the inner call really malloc()?
- Function *InnerCallee = Malloc->getCalledFunction();
- if (!InnerCallee)
- return nullptr;
-
- LibFunc Func;
- if (!TLI->getLibFunc(*InnerCallee, Func) || !TLI->has(Func) ||
- Func != LibFunc_malloc)
- return nullptr;
-
- // The memset must cover the same number of bytes that are malloc'd.
- if (Memset->getArgOperand(2) != Malloc->getArgOperand(0))
- return nullptr;
-
- // Replace the malloc with a calloc. We need the data layout to know what the
- // actual size of a 'size_t' parameter is.
- B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator());
- const DataLayout &DL = Malloc->getModule()->getDataLayout();
- IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext());
- Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1),
- Malloc->getArgOperand(0), Malloc->getAttributes(),
- B, *TLI);
- if (!Calloc)
- return nullptr;
-
- Malloc->replaceAllUsesWith(Calloc);
- eraseFromParent(Malloc);
-
- return Calloc;
-}
-
-Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
- if (auto *Calloc = foldMallocMemset(CI, B))
- return Calloc;
-
- // memset(p, v, n) -> llvm.memset(align 1 p, v, n)
- Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
- B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
- return CI->getArgOperand(0);
-}
-
-Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilder<> &B) {
- if (isa<ConstantPointerNull>(CI->getArgOperand(0)))
- return emitMalloc(CI->getArgOperand(1), B, DL, TLI);
-
- return nullptr;
-}
-
-//===----------------------------------------------------------------------===//
-// Math Library Optimizations
-//===----------------------------------------------------------------------===//
-
-// Replace a libcall \p CI with a call to intrinsic \p IID
-static Value *replaceUnaryCall(CallInst *CI, IRBuilder<> &B, Intrinsic::ID IID) {
- // Propagate fast-math flags from the existing call to the new call.
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(CI->getFastMathFlags());
-
- Module *M = CI->getModule();
- Value *V = CI->getArgOperand(0);
- Function *F = Intrinsic::getDeclaration(M, IID, CI->getType());
- CallInst *NewCall = B.CreateCall(F, V);
- NewCall->takeName(CI);
- return NewCall;
-}
-
-/// Return a variant of Val with float type.
-/// Currently this works in two cases: If Val is an FPExtension of a float
-/// value to something bigger, simply return the operand.
-/// If Val is a ConstantFP but can be converted to a float ConstantFP without
-/// loss of precision do so.
-static Value *valueHasFloatPrecision(Value *Val) {
- if (FPExtInst *Cast = dyn_cast<FPExtInst>(Val)) {
- Value *Op = Cast->getOperand(0);
- if (Op->getType()->isFloatTy())
- return Op;
- }
- if (ConstantFP *Const = dyn_cast<ConstantFP>(Val)) {
- APFloat F = Const->getValueAPF();
- bool losesInfo;
- (void)F.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
- &losesInfo);
- if (!losesInfo)
- return ConstantFP::get(Const->getContext(), F);
- }
- return nullptr;
-}
-
-/// Shrink double -> float functions.
-static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
- bool isBinary, bool isPrecise = false) {
- Function *CalleeFn = CI->getCalledFunction();
- if (!CI->getType()->isDoubleTy() || !CalleeFn)
- return nullptr;
-
- // If not all the uses of the function are converted to float, then bail out.
- // This matters if the precision of the result is more important than the
- // precision of the arguments.
- if (isPrecise)
- for (User *U : CI->users()) {
- FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
- if (!Cast || !Cast->getType()->isFloatTy())
- return nullptr;
- }
-
- // If this is something like 'g((double) float)', convert to 'gf(float)'.
- Value *V[2];
- V[0] = valueHasFloatPrecision(CI->getArgOperand(0));
- V[1] = isBinary ? valueHasFloatPrecision(CI->getArgOperand(1)) : nullptr;
- if (!V[0] || (isBinary && !V[1]))
- return nullptr;
-
- StringRef CalleeNm = CalleeFn->getName();
- AttributeList CalleeAt = CalleeFn->getAttributes();
- bool CalleeIn = CalleeFn->isIntrinsic();
-
- // If call isn't an intrinsic, check that it isn't within a function with the
- // same name as the float version of this call, otherwise the result is an
- // infinite loop. For example, from MinGW-w64:
- //
- // float expf(float val) { return (float) exp((double) val); }
- if (!CalleeIn) {
- const Function *Fn = CI->getFunction();
- StringRef FnName = Fn->getName();
- if (FnName.back() == 'f' &&
- FnName.size() == (CalleeNm.size() + 1) &&
- FnName.startswith(CalleeNm))
- return nullptr;
- }
-
- // Propagate the math semantics from the current function to the new function.
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(CI->getFastMathFlags());
-
- // g((double) float) -> (double) gf(float)
- Value *R;
- if (CalleeIn) {
- Module *M = CI->getModule();
- Intrinsic::ID IID = CalleeFn->getIntrinsicID();
- Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
- R = isBinary ? B.CreateCall(Fn, V) : B.CreateCall(Fn, V[0]);
- }
- else
- R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], CalleeNm, B, CalleeAt)
- : emitUnaryFloatFnCall(V[0], CalleeNm, B, CalleeAt);
-
- return B.CreateFPExt(R, B.getDoubleTy());
-}
-
-/// Shrink double -> float for unary functions.
-static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
- bool isPrecise = false) {
- return optimizeDoubleFP(CI, B, false, isPrecise);
-}
-
-/// Shrink double -> float for binary functions.
-static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B,
- bool isPrecise = false) {
- return optimizeDoubleFP(CI, B, true, isPrecise);
-}
-
-// cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z)))
-Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilder<> &B) {
- if (!CI->isFast())
- return nullptr;
-
- // Propagate fast-math flags from the existing call to new instructions.
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(CI->getFastMathFlags());
-
- Value *Real, *Imag;
- if (CI->getNumArgOperands() == 1) {
- Value *Op = CI->getArgOperand(0);
- assert(Op->getType()->isArrayTy() && "Unexpected signature for cabs!");
- Real = B.CreateExtractValue(Op, 0, "real");
- Imag = B.CreateExtractValue(Op, 1, "imag");
- } else {
- assert(CI->getNumArgOperands() == 2 && "Unexpected signature for cabs!");
- Real = CI->getArgOperand(0);
- Imag = CI->getArgOperand(1);
- }
-
- Value *RealReal = B.CreateFMul(Real, Real);
- Value *ImagImag = B.CreateFMul(Imag, Imag);
-
- Function *FSqrt = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::sqrt,
- CI->getType());
- return B.CreateCall(FSqrt, B.CreateFAdd(RealReal, ImagImag), "cabs");
-}
-
-static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func,
- IRBuilder<> &B) {
- if (!isa<FPMathOperator>(Call))
- return nullptr;
-
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(Call->getFastMathFlags());
-
- // TODO: Can this be shared to also handle LLVM intrinsics?
- Value *X;
- switch (Func) {
- case LibFunc_sin:
- case LibFunc_sinf:
- case LibFunc_sinl:
- case LibFunc_tan:
- case LibFunc_tanf:
- case LibFunc_tanl:
- // sin(-X) --> -sin(X)
- // tan(-X) --> -tan(X)
- if (match(Call->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X)))))
- return B.CreateFNeg(B.CreateCall(Call->getCalledFunction(), X));
- break;
- case LibFunc_cos:
- case LibFunc_cosf:
- case LibFunc_cosl:
- // cos(-X) --> cos(X)
- if (match(Call->getArgOperand(0), m_FNeg(m_Value(X))))
- return B.CreateCall(Call->getCalledFunction(), X, "cos");
- break;
- default:
- break;
- }
- return nullptr;
-}
-
-static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
- // Multiplications calculated using Addition Chains.
- // Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html
-
- assert(Exp != 0 && "Incorrect exponent 0 not handled");
-
- if (InnerChain[Exp])
- return InnerChain[Exp];
-
- static const unsigned AddChain[33][2] = {
- {0, 0}, // Unused.
- {0, 0}, // Unused (base case = pow1).
- {1, 1}, // Unused (pre-computed).
- {1, 2}, {2, 2}, {2, 3}, {3, 3}, {2, 5}, {4, 4},
- {1, 8}, {5, 5}, {1, 10}, {6, 6}, {4, 9}, {7, 7},
- {3, 12}, {8, 8}, {8, 9}, {2, 16}, {1, 18}, {10, 10},
- {6, 15}, {11, 11}, {3, 20}, {12, 12}, {8, 17}, {13, 13},
- {3, 24}, {14, 14}, {4, 25}, {15, 15}, {3, 28}, {16, 16},
- };
-
- InnerChain[Exp] = B.CreateFMul(getPow(InnerChain, AddChain[Exp][0], B),
- getPow(InnerChain, AddChain[Exp][1], B));
- return InnerChain[Exp];
-}
-
-/// Use exp{,2}(x * y) for pow(exp{,2}(x), y);
-/// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x);
-/// exp2(log2(n) * x) for pow(n, x).
-Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
- Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
- AttributeList Attrs = Pow->getCalledFunction()->getAttributes();
- Module *Mod = Pow->getModule();
- Type *Ty = Pow->getType();
- bool Ignored;
-
- // Evaluate special cases related to a nested function as the base.
-
- // pow(exp(x), y) -> exp(x * y)
- // pow(exp2(x), y) -> exp2(x * y)
- // If exp{,2}() is used only once, it is better to fold two transcendental
- // math functions into one. If used again, exp{,2}() would still have to be
- // called with the original argument, then keep both original transcendental
- // functions. However, this transformation is only safe with fully relaxed
- // math semantics, since, besides rounding differences, it changes overflow
- // and underflow behavior quite dramatically. For example:
- // pow(exp(1000), 0.001) = pow(inf, 0.001) = inf
- // Whereas:
- // exp(1000 * 0.001) = exp(1)
- // TODO: Loosen the requirement for fully relaxed math semantics.
- // TODO: Handle exp10() when more targets have it available.
- CallInst *BaseFn = dyn_cast<CallInst>(Base);
- if (BaseFn && BaseFn->hasOneUse() && BaseFn->isFast() && Pow->isFast()) {
- LibFunc LibFn;
-
- Function *CalleeFn = BaseFn->getCalledFunction();
- if (CalleeFn &&
- TLI->getLibFunc(CalleeFn->getName(), LibFn) && TLI->has(LibFn)) {
- StringRef ExpName;
- Intrinsic::ID ID;
- Value *ExpFn;
- LibFunc LibFnFloat;
- LibFunc LibFnDouble;
- LibFunc LibFnLongDouble;
-
- switch (LibFn) {
- default:
- return nullptr;
- case LibFunc_expf: case LibFunc_exp: case LibFunc_expl:
- ExpName = TLI->getName(LibFunc_exp);
- ID = Intrinsic::exp;
- LibFnFloat = LibFunc_expf;
- LibFnDouble = LibFunc_exp;
- LibFnLongDouble = LibFunc_expl;
- break;
- case LibFunc_exp2f: case LibFunc_exp2: case LibFunc_exp2l:
- ExpName = TLI->getName(LibFunc_exp2);
- ID = Intrinsic::exp2;
- LibFnFloat = LibFunc_exp2f;
- LibFnDouble = LibFunc_exp2;
- LibFnLongDouble = LibFunc_exp2l;
- break;
- }
-
- // Create new exp{,2}() with the product as its argument.
- Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul");
- ExpFn = BaseFn->doesNotAccessMemory()
- ? B.CreateCall(Intrinsic::getDeclaration(Mod, ID, Ty),
- FMul, ExpName)
- : emitUnaryFloatFnCall(FMul, TLI, LibFnDouble, LibFnFloat,
- LibFnLongDouble, B,
- BaseFn->getAttributes());
-
- // Since the new exp{,2}() is different from the original one, dead code
- // elimination cannot be trusted to remove it, since it may have side
- // effects (e.g., errno). When the only consumer for the original
- // exp{,2}() is pow(), then it has to be explicitly erased.
- BaseFn->replaceAllUsesWith(ExpFn);
- eraseFromParent(BaseFn);
-
- return ExpFn;
- }
- }
-
- // Evaluate special cases related to a constant base.
-
- const APFloat *BaseF;
- if (!match(Pow->getArgOperand(0), m_APFloat(BaseF)))
- return nullptr;
-
- // pow(2.0 ** n, x) -> exp2(n * x)
- if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) {
- APFloat BaseR = APFloat(1.0);
- BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored);
- BaseR = BaseR / *BaseF;
- bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger();
- const APFloat *NF = IsReciprocal ? &BaseR : BaseF;
- APSInt NI(64, false);
- if ((IsInteger || IsReciprocal) &&
- NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) ==
- APFloat::opOK &&
- NI > 1 && NI.isPowerOf2()) {
- double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0);
- Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul");
- if (Pow->doesNotAccessMemory())
- return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty),
- FMul, "exp2");
- else
- return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f,
- LibFunc_exp2l, B, Attrs);
- }
- }
-
- // pow(10.0, x) -> exp10(x)
- // TODO: There is no exp10() intrinsic yet, but some day there shall be one.
- if (match(Base, m_SpecificFP(10.0)) &&
- hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
- return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f,
- LibFunc_exp10l, B, Attrs);
-
- // pow(n, x) -> exp2(log2(n) * x)
- if (Pow->hasOneUse() && Pow->hasApproxFunc() && Pow->hasNoNaNs() &&
- Pow->hasNoInfs() && BaseF->isNormal() && !BaseF->isNegative()) {
- Value *Log = nullptr;
- if (Ty->isFloatTy())
- Log = ConstantFP::get(Ty, std::log2(BaseF->convertToFloat()));
- else if (Ty->isDoubleTy())
- Log = ConstantFP::get(Ty, std::log2(BaseF->convertToDouble()));
-
- if (Log) {
- Value *FMul = B.CreateFMul(Log, Expo, "mul");
- if (Pow->doesNotAccessMemory()) {
- return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty),
- FMul, "exp2");
- } else {
- if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f,
- LibFunc_exp2l))
- return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f,
- LibFunc_exp2l, B, Attrs);
- }
- }
- }
- return nullptr;
-}
-
-static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno,
- Module *M, IRBuilder<> &B,
- const TargetLibraryInfo *TLI) {
- // If errno is never set, then use the intrinsic for sqrt().
- if (NoErrno) {
- Function *SqrtFn =
- Intrinsic::getDeclaration(M, Intrinsic::sqrt, V->getType());
- return B.CreateCall(SqrtFn, V, "sqrt");
- }
-
- // Otherwise, use the libcall for sqrt().
- if (hasUnaryFloatFn(TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf,
- LibFunc_sqrtl))
- // TODO: We also should check that the target can in fact lower the sqrt()
- // libcall. We currently have no way to ask this question, so we ask if
- // the target has a sqrt() libcall, which is not exactly the same.
- return emitUnaryFloatFnCall(V, TLI, LibFunc_sqrt, LibFunc_sqrtf,
- LibFunc_sqrtl, B, Attrs);
-
- return nullptr;
-}
-
-/// Use square root in place of pow(x, +/-0.5).
-Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
- Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
- AttributeList Attrs = Pow->getCalledFunction()->getAttributes();
- Module *Mod = Pow->getModule();
- Type *Ty = Pow->getType();
-
- const APFloat *ExpoF;
- if (!match(Expo, m_APFloat(ExpoF)) ||
- (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)))
- return nullptr;
-
- Sqrt = getSqrtCall(Base, Attrs, Pow->doesNotAccessMemory(), Mod, B, TLI);
- if (!Sqrt)
- return nullptr;
-
- // Handle signed zero base by expanding to fabs(sqrt(x)).
- if (!Pow->hasNoSignedZeros()) {
- Function *FAbsFn = Intrinsic::getDeclaration(Mod, Intrinsic::fabs, Ty);
- Sqrt = B.CreateCall(FAbsFn, Sqrt, "abs");
- }
-
- // Handle non finite base by expanding to
- // (x == -infinity ? +infinity : sqrt(x)).
- if (!Pow->hasNoInfs()) {
- Value *PosInf = ConstantFP::getInfinity(Ty),
- *NegInf = ConstantFP::getInfinity(Ty, true);
- Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf");
- Sqrt = B.CreateSelect(FCmp, PosInf, Sqrt);
- }
-
- // If the exponent is negative, then get the reciprocal.
- if (ExpoF->isNegative())
- Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal");
-
- return Sqrt;
-}
-
-static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M,
- IRBuilder<> &B) {
- Value *Args[] = {Base, Expo};
- Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType());
- return B.CreateCall(F, Args);
-}
-
-Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
- Value *Base = Pow->getArgOperand(0);
- Value *Expo = Pow->getArgOperand(1);
- Function *Callee = Pow->getCalledFunction();
- StringRef Name = Callee->getName();
- Type *Ty = Pow->getType();
- Module *M = Pow->getModule();
- Value *Shrunk = nullptr;
- bool AllowApprox = Pow->hasApproxFunc();
- bool Ignored;
-
- // Bail out if simplifying libcalls to pow() is disabled.
- if (!hasUnaryFloatFn(TLI, Ty, LibFunc_pow, LibFunc_powf, LibFunc_powl))
- return nullptr;
-
- // Propagate the math semantics from the call to any created instructions.
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(Pow->getFastMathFlags());
-
- // Shrink pow() to powf() if the arguments are single precision,
- // unless the result is expected to be double precision.
- if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) &&
- hasFloatVersion(Name))
- Shrunk = optimizeBinaryDoubleFP(Pow, B, true);
-
- // Evaluate special cases related to the base.
-
- // pow(1.0, x) -> 1.0
- if (match(Base, m_FPOne()))
- return Base;
-
- if (Value *Exp = replacePowWithExp(Pow, B))
- return Exp;
-
- // Evaluate special cases related to the exponent.
-
- // pow(x, -1.0) -> 1.0 / x
- if (match(Expo, m_SpecificFP(-1.0)))
- return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal");
-
- // pow(x, +/-0.0) -> 1.0
- if (match(Expo, m_AnyZeroFP()))
- return ConstantFP::get(Ty, 1.0);
-
- // pow(x, 1.0) -> x
- if (match(Expo, m_FPOne()))
- return Base;
-
- // pow(x, 2.0) -> x * x
- if (match(Expo, m_SpecificFP(2.0)))
- return B.CreateFMul(Base, Base, "square");
-
- if (Value *Sqrt = replacePowWithSqrt(Pow, B))
- return Sqrt;
-
- // pow(x, n) -> x * x * x * ...
- const APFloat *ExpoF;
- if (AllowApprox && match(Expo, m_APFloat(ExpoF))) {
- // We limit to a max of 7 multiplications, thus the maximum exponent is 32.
- // If the exponent is an integer+0.5 we generate a call to sqrt and an
- // additional fmul.
- // TODO: This whole transformation should be backend specific (e.g. some
- // backends might prefer libcalls or the limit for the exponent might
- // be different) and it should also consider optimizing for size.
- APFloat LimF(ExpoF->getSemantics(), 33.0),
- ExpoA(abs(*ExpoF));
- if (ExpoA.compare(LimF) == APFloat::cmpLessThan) {
- // This transformation applies to integer or integer+0.5 exponents only.
- // For integer+0.5, we create a sqrt(Base) call.
- Value *Sqrt = nullptr;
- if (!ExpoA.isInteger()) {
- APFloat Expo2 = ExpoA;
- // To check if ExpoA is an integer + 0.5, we add it to itself. If there
- // is no floating point exception and the result is an integer, then
- // ExpoA == integer + 0.5
- if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK)
- return nullptr;
-
- if (!Expo2.isInteger())
- return nullptr;
-
- Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
- Pow->doesNotAccessMemory(), M, B, TLI);
- }
-
- // We will memoize intermediate products of the Addition Chain.
- Value *InnerChain[33] = {nullptr};
- InnerChain[1] = Base;
- InnerChain[2] = B.CreateFMul(Base, Base, "square");
-
- // We cannot readily convert a non-double type (like float) to a double.
- // So we first convert it to something which could be converted to double.
- ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
- Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B);
-
- // Expand pow(x, y+0.5) to pow(x, y) * sqrt(x).
- if (Sqrt)
- FMul = B.CreateFMul(FMul, Sqrt);
-
- // If the exponent is negative, then get the reciprocal.
- if (ExpoF->isNegative())
- FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal");
-
- return FMul;
- }
-
- APSInt IntExpo(32, /*isUnsigned=*/false);
- // powf(x, n) -> powi(x, n) if n is a constant signed integer value
- if (ExpoF->isInteger() &&
- ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
- APFloat::opOK) {
- return createPowWithIntegerExponent(
- Base, ConstantInt::get(B.getInt32Ty(), IntExpo), M, B);
- }
- }
-
- // powf(x, itofp(y)) -> powi(x, y)
- if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) {
- Value *IntExpo = cast<Instruction>(Expo)->getOperand(0);
- Value *NewExpo = nullptr;
- unsigned BitWidth = IntExpo->getType()->getPrimitiveSizeInBits();
- if (isa<SIToFPInst>(Expo) && BitWidth == 32)
- NewExpo = IntExpo;
- else if (BitWidth < 32)
- NewExpo = isa<SIToFPInst>(Expo) ? B.CreateSExt(IntExpo, B.getInt32Ty())
- : B.CreateZExt(IntExpo, B.getInt32Ty());
- if (NewExpo)
- return createPowWithIntegerExponent(Base, NewExpo, M, B);
- }
-
- return Shrunk;
-}
-
-Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- Value *Ret = nullptr;
- StringRef Name = Callee->getName();
- if (UnsafeFPShrink && Name == "exp2" && hasFloatVersion(Name))
- Ret = optimizeUnaryDoubleFP(CI, B, true);
-
- Value *Op = CI->getArgOperand(0);
- // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
- // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
- LibFunc LdExp = LibFunc_ldexpl;
- if (Op->getType()->isFloatTy())
- LdExp = LibFunc_ldexpf;
- else if (Op->getType()->isDoubleTy())
- LdExp = LibFunc_ldexp;
-
- if (TLI->has(LdExp)) {
- Value *LdExpArg = nullptr;
- if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
- if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
- LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
- } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
- if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
- LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
- }
-
- if (LdExpArg) {
- Constant *One = ConstantFP::get(CI->getContext(), APFloat(1.0f));
- if (!Op->getType()->isFloatTy())
- One = ConstantExpr::getFPExtend(One, Op->getType());
-
- Module *M = CI->getModule();
- FunctionCallee NewCallee = M->getOrInsertFunction(
- TLI->getName(LdExp), Op->getType(), Op->getType(), B.getInt32Ty());
- CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg});
- if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
-
- return CI;
- }
- }
- return Ret;
-}
-
-Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
- // If we can shrink the call to a float function rather than a double
- // function, do that first.
- Function *Callee = CI->getCalledFunction();
- StringRef Name = Callee->getName();
- if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name))
- if (Value *Ret = optimizeBinaryDoubleFP(CI, B))
- return Ret;
-
- // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to
- // the intrinsics for improved optimization (for example, vectorization).
- // No-signed-zeros is implied by the definitions of fmax/fmin themselves.
- // From the C standard draft WG14/N1256:
- // "Ideally, fmax would be sensitive to the sign of zero, for example
- // fmax(-0.0, +0.0) would return +0; however, implementation in software
- // might be impractical."
- IRBuilder<>::FastMathFlagGuard Guard(B);
- FastMathFlags FMF = CI->getFastMathFlags();
- FMF.setNoSignedZeros();
- B.setFastMathFlags(FMF);
-
- Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum
- : Intrinsic::maxnum;
- Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType());
- return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) });
-}
-
-Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- Value *Ret = nullptr;
- StringRef Name = Callee->getName();
- if (UnsafeFPShrink && hasFloatVersion(Name))
- Ret = optimizeUnaryDoubleFP(CI, B, true);
-
- if (!CI->isFast())
- return Ret;
- Value *Op1 = CI->getArgOperand(0);
- auto *OpC = dyn_cast<CallInst>(Op1);
-
- // The earlier call must also be 'fast' in order to do these transforms.
- if (!OpC || !OpC->isFast())
- return Ret;
-
- // log(pow(x,y)) -> y*log(x)
- // This is only applicable to log, log2, log10.
- if (Name != "log" && Name != "log2" && Name != "log10")
- return Ret;
-
- IRBuilder<>::FastMathFlagGuard Guard(B);
- FastMathFlags FMF;
- FMF.setFast();
- B.setFastMathFlags(FMF);
-
- LibFunc Func;
- Function *F = OpC->getCalledFunction();
- if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
- Func == LibFunc_pow) || F->getIntrinsicID() == Intrinsic::pow))
- return B.CreateFMul(OpC->getArgOperand(1),
- emitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B,
- Callee->getAttributes()), "mul");
-
- // log(exp2(y)) -> y*log(2)
- if (F && Name == "log" && TLI->getLibFunc(F->getName(), Func) &&
- TLI->has(Func) && Func == LibFunc_exp2)
- return B.CreateFMul(
- OpC->getArgOperand(0),
- emitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0),
- Callee->getName(), B, Callee->getAttributes()),
- "logmul");
- return Ret;
-}
-
-Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- Value *Ret = nullptr;
- // TODO: Once we have a way (other than checking for the existince of the
- // libcall) to tell whether our target can lower @llvm.sqrt, relax the
- // condition below.
- if (TLI->has(LibFunc_sqrtf) && (Callee->getName() == "sqrt" ||
- Callee->getIntrinsicID() == Intrinsic::sqrt))
- Ret = optimizeUnaryDoubleFP(CI, B, true);
-
- if (!CI->isFast())
- return Ret;
-
- Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0));
- if (!I || I->getOpcode() != Instruction::FMul || !I->isFast())
- return Ret;
-
- // We're looking for a repeated factor in a multiplication tree,
- // so we can do this fold: sqrt(x * x) -> fabs(x);
- // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y).
- Value *Op0 = I->getOperand(0);
- Value *Op1 = I->getOperand(1);
- Value *RepeatOp = nullptr;
- Value *OtherOp = nullptr;
- if (Op0 == Op1) {
- // Simple match: the operands of the multiply are identical.
- RepeatOp = Op0;
- } else {
- // Look for a more complicated pattern: one of the operands is itself
- // a multiply, so search for a common factor in that multiply.
- // Note: We don't bother looking any deeper than this first level or for
- // variations of this pattern because instcombine's visitFMUL and/or the
- // reassociation pass should give us this form.
- Value *OtherMul0, *OtherMul1;
- if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) {
- // Pattern: sqrt((x * y) * z)
- if (OtherMul0 == OtherMul1 && cast<Instruction>(Op0)->isFast()) {
- // Matched: sqrt((x * x) * z)
- RepeatOp = OtherMul0;
- OtherOp = Op1;
- }
- }
- }
- if (!RepeatOp)
- return Ret;
-
- // Fast math flags for any created instructions should match the sqrt
- // and multiply.
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.setFastMathFlags(I->getFastMathFlags());
-
- // If we found a repeated factor, hoist it out of the square root and
- // replace it with the fabs of that factor.
- Module *M = Callee->getParent();
- Type *ArgType = I->getType();
- Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
- Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs");
- if (OtherOp) {
- // If we found a non-repeated factor, we still need to get its square
- // root. We then multiply that by the value that was simplified out
- // of the square root calculation.
- Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
- Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt");
- return B.CreateFMul(FabsCall, SqrtCall);
- }
- return FabsCall;
-}
-
-// TODO: Generalize to handle any trig function and its inverse.
-Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- Value *Ret = nullptr;
- StringRef Name = Callee->getName();
- if (UnsafeFPShrink && Name == "tan" && hasFloatVersion(Name))
- Ret = optimizeUnaryDoubleFP(CI, B, true);
-
- Value *Op1 = CI->getArgOperand(0);
- auto *OpC = dyn_cast<CallInst>(Op1);
- if (!OpC)
- return Ret;
-
- // Both calls must be 'fast' in order to remove them.
- if (!CI->isFast() || !OpC->isFast())
- return Ret;
-
- // tan(atan(x)) -> x
- // tanf(atanf(x)) -> x
- // tanl(atanl(x)) -> x
- LibFunc Func;
- Function *F = OpC->getCalledFunction();
- if (F && TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
- ((Func == LibFunc_atan && Callee->getName() == "tan") ||
- (Func == LibFunc_atanf && Callee->getName() == "tanf") ||
- (Func == LibFunc_atanl && Callee->getName() == "tanl")))
- Ret = OpC->getArgOperand(0);
- return Ret;
-}
-
-static bool isTrigLibCall(CallInst *CI) {
- // We can only hope to do anything useful if we can ignore things like errno
- // and floating-point exceptions.
- // We already checked the prototype.
- return CI->hasFnAttr(Attribute::NoUnwind) &&
- CI->hasFnAttr(Attribute::ReadNone);
-}
-
-static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
- bool UseFloat, Value *&Sin, Value *&Cos,
- Value *&SinCos) {
- Type *ArgTy = Arg->getType();
- Type *ResTy;
- StringRef Name;
-
- Triple T(OrigCallee->getParent()->getTargetTriple());
- if (UseFloat) {
- Name = "__sincospif_stret";
-
- assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
- // x86_64 can't use {float, float} since that would be returned in both
- // xmm0 and xmm1, which isn't what a real struct would do.
- ResTy = T.getArch() == Triple::x86_64
- ? static_cast<Type *>(VectorType::get(ArgTy, 2))
- : static_cast<Type *>(StructType::get(ArgTy, ArgTy));
- } else {
- Name = "__sincospi_stret";
- ResTy = StructType::get(ArgTy, ArgTy);
- }
-
- Module *M = OrigCallee->getParent();
- FunctionCallee Callee =
- M->getOrInsertFunction(Name, OrigCallee->getAttributes(), ResTy, ArgTy);
-
- if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
- // If the argument is an instruction, it must dominate all uses so put our
- // sincos call there.
- B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
- } else {
- // Otherwise (e.g. for a constant) the beginning of the function is as
- // good a place as any.
- BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock();
- B.SetInsertPoint(&EntryBB, EntryBB.begin());
- }
-
- SinCos = B.CreateCall(Callee, Arg, "sincospi");
-
- if (SinCos->getType()->isStructTy()) {
- Sin = B.CreateExtractValue(SinCos, 0, "sinpi");
- Cos = B.CreateExtractValue(SinCos, 1, "cospi");
- } else {
- Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0),
- "sinpi");
- Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1),
- "cospi");
- }
-}
-
-Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilder<> &B) {
- // Make sure the prototype is as expected, otherwise the rest of the
- // function is probably invalid and likely to abort.
- if (!isTrigLibCall(CI))
- return nullptr;
-
- Value *Arg = CI->getArgOperand(0);
- SmallVector<CallInst *, 1> SinCalls;
- SmallVector<CallInst *, 1> CosCalls;
- SmallVector<CallInst *, 1> SinCosCalls;
-
- bool IsFloat = Arg->getType()->isFloatTy();
-
- // Look for all compatible sinpi, cospi and sincospi calls with the same
- // argument. If there are enough (in some sense) we can make the
- // substitution.
- Function *F = CI->getFunction();
- for (User *U : Arg->users())
- classifyArgUse(U, F, IsFloat, SinCalls, CosCalls, SinCosCalls);
-
- // It's only worthwhile if both sinpi and cospi are actually used.
- if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty()))
- return nullptr;
-
- Value *Sin, *Cos, *SinCos;
- insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos, SinCos);
-
- auto replaceTrigInsts = [this](SmallVectorImpl<CallInst *> &Calls,
- Value *Res) {
- for (CallInst *C : Calls)
- replaceAllUsesWith(C, Res);
- };
-
- replaceTrigInsts(SinCalls, Sin);
- replaceTrigInsts(CosCalls, Cos);
- replaceTrigInsts(SinCosCalls, SinCos);
-
- return nullptr;
-}
-
-void LibCallSimplifier::classifyArgUse(
- Value *Val, Function *F, bool IsFloat,
- SmallVectorImpl<CallInst *> &SinCalls,
- SmallVectorImpl<CallInst *> &CosCalls,
- SmallVectorImpl<CallInst *> &SinCosCalls) {
- CallInst *CI = dyn_cast<CallInst>(Val);
-
- if (!CI)
- return;
-
- // Don't consider calls in other functions.
- if (CI->getFunction() != F)
- return;
-
- Function *Callee = CI->getCalledFunction();
- LibFunc Func;
- if (!Callee || !TLI->getLibFunc(*Callee, Func) || !TLI->has(Func) ||
- !isTrigLibCall(CI))
- return;
-
- if (IsFloat) {
- if (Func == LibFunc_sinpif)
- SinCalls.push_back(CI);
- else if (Func == LibFunc_cospif)
- CosCalls.push_back(CI);
- else if (Func == LibFunc_sincospif_stret)
- SinCosCalls.push_back(CI);
- } else {
- if (Func == LibFunc_sinpi)
- SinCalls.push_back(CI);
- else if (Func == LibFunc_cospi)
- CosCalls.push_back(CI);
- else if (Func == LibFunc_sincospi_stret)
- SinCosCalls.push_back(CI);
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Integer Library Call Optimizations
-//===----------------------------------------------------------------------===//
-
-Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
- // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
- Value *Op = CI->getArgOperand(0);
- Type *ArgType = Op->getType();
- Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
- Intrinsic::cttz, ArgType);
- Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz");
- V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
- V = B.CreateIntCast(V, B.getInt32Ty(), false);
-
- Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
- return B.CreateSelect(Cond, V, B.getInt32(0));
-}
-
-Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilder<> &B) {
- // fls(x) -> (i32)(sizeInBits(x) - llvm.ctlz(x, false))
- Value *Op = CI->getArgOperand(0);
- Type *ArgType = Op->getType();
- Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
- Intrinsic::ctlz, ArgType);
- Value *V = B.CreateCall(F, {Op, B.getFalse()}, "ctlz");
- V = B.CreateSub(ConstantInt::get(V->getType(), ArgType->getIntegerBitWidth()),
- V);
- return B.CreateIntCast(V, CI->getType(), false);
-}
-
-Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) {
- // abs(x) -> x <s 0 ? -x : x
- // The negation has 'nsw' because abs of INT_MIN is undefined.
- Value *X = CI->getArgOperand(0);
- Value *IsNeg = B.CreateICmpSLT(X, Constant::getNullValue(X->getType()));
- Value *NegX = B.CreateNSWNeg(X, "neg");
- return B.CreateSelect(IsNeg, NegX, X);
-}
-
-Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) {
- // isdigit(c) -> (c-'0') <u 10
- Value *Op = CI->getArgOperand(0);
- Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
- Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit");
- return B.CreateZExt(Op, CI->getType());
-}
-
-Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilder<> &B) {
- // isascii(c) -> c <u 128
- Value *Op = CI->getArgOperand(0);
- Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
- return B.CreateZExt(Op, CI->getType());
-}
-
-Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilder<> &B) {
- // toascii(c) -> c & 0x7f
- return B.CreateAnd(CI->getArgOperand(0),
- ConstantInt::get(CI->getType(), 0x7F));
-}
-
-Value *LibCallSimplifier::optimizeAtoi(CallInst *CI, IRBuilder<> &B) {
- StringRef Str;
- if (!getConstantStringInfo(CI->getArgOperand(0), Str))
- return nullptr;
-
- return convertStrToNumber(CI, Str, 10);
-}
-
-Value *LibCallSimplifier::optimizeStrtol(CallInst *CI, IRBuilder<> &B) {
- StringRef Str;
- if (!getConstantStringInfo(CI->getArgOperand(0), Str))
- return nullptr;
-
- if (!isa<ConstantPointerNull>(CI->getArgOperand(1)))
- return nullptr;
-
- if (ConstantInt *CInt = dyn_cast<ConstantInt>(CI->getArgOperand(2))) {
- return convertStrToNumber(CI, Str, CInt->getSExtValue());
- }
-
- return nullptr;
-}
-
-//===----------------------------------------------------------------------===//
-// Formatting and IO Library Call Optimizations
-//===----------------------------------------------------------------------===//
-
-static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg);
-
-Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B,
- int StreamArg) {
- Function *Callee = CI->getCalledFunction();
- // Error reporting calls should be cold, mark them as such.
- // This applies even to non-builtin calls: it is only a hint and applies to
- // functions that the frontend might not understand as builtins.
-
- // This heuristic was suggested in:
- // Improving Static Branch Prediction in a Compiler
- // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu
- // Proceedings of PACT'98, Oct. 1998, IEEE
- if (!CI->hasFnAttr(Attribute::Cold) &&
- isReportingError(Callee, CI, StreamArg)) {
- CI->addAttribute(AttributeList::FunctionIndex, Attribute::Cold);
- }
-
- return nullptr;
-}
-
-static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) {
- if (!Callee || !Callee->isDeclaration())
- return false;
-
- if (StreamArg < 0)
- return true;
-
- // These functions might be considered cold, but only if their stream
- // argument is stderr.
-
- if (StreamArg >= (int)CI->getNumArgOperands())
- return false;
- LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg));
- if (!LI)
- return false;
- GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand());
- if (!GV || !GV->isDeclaration())
- return false;
- return GV->getName() == "stderr";
-}
-
-Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) {
- // Check for a fixed format string.
- StringRef FormatStr;
- if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
- return nullptr;
-
- // Empty format string -> noop.
- if (FormatStr.empty()) // Tolerate printf's declared void.
- return CI->use_empty() ? (Value *)CI : ConstantInt::get(CI->getType(), 0);
-
- // Do not do any of the following transformations if the printf return value
- // is used, in general the printf return value is not compatible with either
- // putchar() or puts().
- if (!CI->use_empty())
- return nullptr;
-
- // printf("x") -> putchar('x'), even for "%" and "%%".
- if (FormatStr.size() == 1 || FormatStr == "%%")
- return emitPutChar(B.getInt32(FormatStr[0]), B, TLI);
-
- // printf("%s", "a") --> putchar('a')
- if (FormatStr == "%s" && CI->getNumArgOperands() > 1) {
- StringRef ChrStr;
- if (!getConstantStringInfo(CI->getOperand(1), ChrStr))
- return nullptr;
- if (ChrStr.size() != 1)
- return nullptr;
- return emitPutChar(B.getInt32(ChrStr[0]), B, TLI);
- }
-
- // printf("foo\n") --> puts("foo")
- if (FormatStr[FormatStr.size() - 1] == '\n' &&
- FormatStr.find('%') == StringRef::npos) { // No format characters.
- // Create a string literal with no \n on it. We expect the constant merge
- // pass to be run after this pass, to merge duplicate strings.
- FormatStr = FormatStr.drop_back();
- Value *GV = B.CreateGlobalString(FormatStr, "str");
- return emitPutS(GV, B, TLI);
- }
-
- // Optimize specific format strings.
- // printf("%c", chr) --> putchar(chr)
- if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
- CI->getArgOperand(1)->getType()->isIntegerTy())
- return emitPutChar(CI->getArgOperand(1), B, TLI);
-
- // printf("%s\n", str) --> puts(str)
- if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
- CI->getArgOperand(1)->getType()->isPointerTy())
- return emitPutS(CI->getArgOperand(1), B, TLI);
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) {
-
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- if (Value *V = optimizePrintFString(CI, B)) {
- return V;
- }
-
- // printf(format, ...) -> iprintf(format, ...) if no floating point
- // arguments.
- if (TLI->has(LibFunc_iprintf) && !callHasFloatingPointArgument(CI)) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- FunctionCallee IPrintFFn =
- M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
- CallInst *New = cast<CallInst>(CI->clone());
- New->setCalledFunction(IPrintFFn);
- B.Insert(New);
- return New;
- }
-
- // printf(format, ...) -> __small_printf(format, ...) if no 128-bit floating point
- // arguments.
- if (TLI->has(LibFunc_small_printf) && !callHasFP128Argument(CI)) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- auto SmallPrintFFn =
- M->getOrInsertFunction(TLI->getName(LibFunc_small_printf),
- FT, Callee->getAttributes());
- CallInst *New = cast<CallInst>(CI->clone());
- New->setCalledFunction(SmallPrintFFn);
- B.Insert(New);
- return New;
- }
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
- // Check for a fixed format string.
- StringRef FormatStr;
- if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
- return nullptr;
-
- // If we just have a format string (nothing else crazy) transform it.
- if (CI->getNumArgOperands() == 2) {
- // Make sure there's no % in the constant array. We could try to handle
- // %% -> % in the future if we cared.
- if (FormatStr.find('%') != StringRef::npos)
- return nullptr; // we found a format specifier, bail out.
-
- // sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1)
- B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
- ConstantInt::get(DL.getIntPtrType(CI->getContext()),
- FormatStr.size() + 1)); // Copy the null byte.
- return ConstantInt::get(CI->getType(), FormatStr.size());
- }
-
- // The remaining optimizations require the format string to be "%s" or "%c"
- // and have an extra operand.
- if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
- CI->getNumArgOperands() < 3)
- return nullptr;
-
- // Decode the second character of the format string.
- if (FormatStr[1] == 'c') {
- // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
- if (!CI->getArgOperand(2)->getType()->isIntegerTy())
- return nullptr;
- Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
- Value *Ptr = castToCStr(CI->getArgOperand(0), B);
- B.CreateStore(V, Ptr);
- Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
- B.CreateStore(B.getInt8(0), Ptr);
-
- return ConstantInt::get(CI->getType(), 1);
- }
-
- if (FormatStr[1] == 's') {
- // sprintf(dest, "%s", str) -> llvm.memcpy(align 1 dest, align 1 str,
- // strlen(str)+1)
- if (!CI->getArgOperand(2)->getType()->isPointerTy())
- return nullptr;
-
- Value *Len = emitStrLen(CI->getArgOperand(2), B, DL, TLI);
- if (!Len)
- return nullptr;
- Value *IncLen =
- B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc");
- B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(2), 1, IncLen);
-
- // The sprintf result is the unincremented number of bytes in the string.
- return B.CreateIntCast(Len, CI->getType(), false);
- }
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- if (Value *V = optimizeSPrintFString(CI, B)) {
- return V;
- }
-
- // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
- // point arguments.
- if (TLI->has(LibFunc_siprintf) && !callHasFloatingPointArgument(CI)) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- FunctionCallee SIPrintFFn =
- M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
- CallInst *New = cast<CallInst>(CI->clone());
- New->setCalledFunction(SIPrintFFn);
- B.Insert(New);
- return New;
- }
-
- // sprintf(str, format, ...) -> __small_sprintf(str, format, ...) if no 128-bit
- // floating point arguments.
- if (TLI->has(LibFunc_small_sprintf) && !callHasFP128Argument(CI)) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- auto SmallSPrintFFn =
- M->getOrInsertFunction(TLI->getName(LibFunc_small_sprintf),
- FT, Callee->getAttributes());
- CallInst *New = cast<CallInst>(CI->clone());
- New->setCalledFunction(SmallSPrintFFn);
- B.Insert(New);
- return New;
- }
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) {
- // Check for a fixed format string.
- StringRef FormatStr;
- if (!getConstantStringInfo(CI->getArgOperand(2), FormatStr))
- return nullptr;
-
- // Check for size
- ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1));
- if (!Size)
- return nullptr;
-
- uint64_t N = Size->getZExtValue();
-
- // If we just have a format string (nothing else crazy) transform it.
- if (CI->getNumArgOperands() == 3) {
- // Make sure there's no % in the constant array. We could try to handle
- // %% -> % in the future if we cared.
- if (FormatStr.find('%') != StringRef::npos)
- return nullptr; // we found a format specifier, bail out.
-
- if (N == 0)
- return ConstantInt::get(CI->getType(), FormatStr.size());
- else if (N < FormatStr.size() + 1)
- return nullptr;
-
- // snprintf(dst, size, fmt) -> llvm.memcpy(align 1 dst, align 1 fmt,
- // strlen(fmt)+1)
- B.CreateMemCpy(
- CI->getArgOperand(0), 1, CI->getArgOperand(2), 1,
- ConstantInt::get(DL.getIntPtrType(CI->getContext()),
- FormatStr.size() + 1)); // Copy the null byte.
- return ConstantInt::get(CI->getType(), FormatStr.size());
- }
-
- // The remaining optimizations require the format string to be "%s" or "%c"
- // and have an extra operand.
- if (FormatStr.size() == 2 && FormatStr[0] == '%' &&
- CI->getNumArgOperands() == 4) {
-
- // Decode the second character of the format string.
- if (FormatStr[1] == 'c') {
- if (N == 0)
- return ConstantInt::get(CI->getType(), 1);
- else if (N == 1)
- return nullptr;
-
- // snprintf(dst, size, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
- if (!CI->getArgOperand(3)->getType()->isIntegerTy())
- return nullptr;
- Value *V = B.CreateTrunc(CI->getArgOperand(3), B.getInt8Ty(), "char");
- Value *Ptr = castToCStr(CI->getArgOperand(0), B);
- B.CreateStore(V, Ptr);
- Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
- B.CreateStore(B.getInt8(0), Ptr);
-
- return ConstantInt::get(CI->getType(), 1);
- }
-
- if (FormatStr[1] == 's') {
- // snprintf(dest, size, "%s", str) to llvm.memcpy(dest, str, len+1, 1)
- StringRef Str;
- if (!getConstantStringInfo(CI->getArgOperand(3), Str))
- return nullptr;
-
- if (N == 0)
- return ConstantInt::get(CI->getType(), Str.size());
- else if (N < Str.size() + 1)
- return nullptr;
-
- B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(3), 1,
- ConstantInt::get(CI->getType(), Str.size() + 1));
-
- // The snprintf result is the unincremented number of bytes in the string.
- return ConstantInt::get(CI->getType(), Str.size());
- }
- }
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilder<> &B) {
- if (Value *V = optimizeSnPrintFString(CI, B)) {
- return V;
- }
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) {
- optimizeErrorReporting(CI, B, 0);
-
- // All the optimizations depend on the format string.
- StringRef FormatStr;
- if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
- return nullptr;
-
- // Do not do any of the following transformations if the fprintf return
- // value is used, in general the fprintf return value is not compatible
- // with fwrite(), fputc() or fputs().
- if (!CI->use_empty())
- return nullptr;
-
- // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
- if (CI->getNumArgOperands() == 2) {
- // Could handle %% -> % if we cared.
- if (FormatStr.find('%') != StringRef::npos)
- return nullptr; // We found a format specifier.
-
- return emitFWrite(
- CI->getArgOperand(1),
- ConstantInt::get(DL.getIntPtrType(CI->getContext()), FormatStr.size()),
- CI->getArgOperand(0), B, DL, TLI);
- }
-
- // The remaining optimizations require the format string to be "%s" or "%c"
- // and have an extra operand.
- if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
- CI->getNumArgOperands() < 3)
- return nullptr;
-
- // Decode the second character of the format string.
- if (FormatStr[1] == 'c') {
- // fprintf(F, "%c", chr) --> fputc(chr, F)
- if (!CI->getArgOperand(2)->getType()->isIntegerTy())
- return nullptr;
- return emitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
- }
-
- if (FormatStr[1] == 's') {
- // fprintf(F, "%s", str) --> fputs(str, F)
- if (!CI->getArgOperand(2)->getType()->isPointerTy())
- return nullptr;
- return emitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
- }
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
- if (Value *V = optimizeFPrintFString(CI, B)) {
- return V;
- }
-
- // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
- // floating point arguments.
- if (TLI->has(LibFunc_fiprintf) && !callHasFloatingPointArgument(CI)) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- FunctionCallee FIPrintFFn =
- M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
- CallInst *New = cast<CallInst>(CI->clone());
- New->setCalledFunction(FIPrintFFn);
- B.Insert(New);
- return New;
- }
-
- // fprintf(stream, format, ...) -> __small_fprintf(stream, format, ...) if no
- // 128-bit floating point arguments.
- if (TLI->has(LibFunc_small_fprintf) && !callHasFP128Argument(CI)) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- auto SmallFPrintFFn =
- M->getOrInsertFunction(TLI->getName(LibFunc_small_fprintf),
- FT, Callee->getAttributes());
- CallInst *New = cast<CallInst>(CI->clone());
- New->setCalledFunction(SmallFPrintFFn);
- B.Insert(New);
- return New;
- }
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) {
- optimizeErrorReporting(CI, B, 3);
-
- // Get the element size and count.
- ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
- ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
- if (SizeC && CountC) {
- uint64_t Bytes = SizeC->getZExtValue() * CountC->getZExtValue();
-
- // If this is writing zero records, remove the call (it's a noop).
- if (Bytes == 0)
- return ConstantInt::get(CI->getType(), 0);
-
- // If this is writing one byte, turn it into fputc.
- // This optimisation is only valid, if the return value is unused.
- if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
- Value *Char = B.CreateLoad(B.getInt8Ty(),
- castToCStr(CI->getArgOperand(0), B), "char");
- Value *NewCI = emitFPutC(Char, CI->getArgOperand(3), B, TLI);
- return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
- }
- }
-
- if (isLocallyOpenedFile(CI->getArgOperand(3), CI, B, TLI))
- return emitFWriteUnlocked(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), CI->getArgOperand(3), B, DL,
- TLI);
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
- optimizeErrorReporting(CI, B, 1);
-
- // Don't rewrite fputs to fwrite when optimising for size because fwrite
- // requires more arguments and thus extra MOVs are required.
- bool OptForSize = CI->getFunction()->hasOptSize() ||
- llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
- if (OptForSize)
- return nullptr;
-
- // Check if has any use
- if (!CI->use_empty()) {
- if (isLocallyOpenedFile(CI->getArgOperand(1), CI, B, TLI))
- return emitFPutSUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), B,
- TLI);
- else
- // We can't optimize if return value is used.
- return nullptr;
- }
-
- // fputs(s,F) --> fwrite(s,strlen(s),1,F)
- uint64_t Len = GetStringLength(CI->getArgOperand(0));
- if (!Len)
- return nullptr;
-
- // Known to have no uses (see above).
- return emitFWrite(
- CI->getArgOperand(0),
- ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1),
- CI->getArgOperand(1), B, DL, TLI);
-}
-
-Value *LibCallSimplifier::optimizeFPutc(CallInst *CI, IRBuilder<> &B) {
- optimizeErrorReporting(CI, B, 1);
-
- if (isLocallyOpenedFile(CI->getArgOperand(1), CI, B, TLI))
- return emitFPutCUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), B,
- TLI);
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeFGetc(CallInst *CI, IRBuilder<> &B) {
- if (isLocallyOpenedFile(CI->getArgOperand(0), CI, B, TLI))
- return emitFGetCUnlocked(CI->getArgOperand(0), B, TLI);
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeFGets(CallInst *CI, IRBuilder<> &B) {
- if (isLocallyOpenedFile(CI->getArgOperand(2), CI, B, TLI))
- return emitFGetSUnlocked(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, TLI);
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeFRead(CallInst *CI, IRBuilder<> &B) {
- if (isLocallyOpenedFile(CI->getArgOperand(3), CI, B, TLI))
- return emitFReadUnlocked(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), CI->getArgOperand(3), B, DL,
- TLI);
-
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
- if (!CI->use_empty())
- return nullptr;
-
- // Check for a constant string.
- // puts("") -> putchar('\n')
- StringRef Str;
- if (getConstantStringInfo(CI->getArgOperand(0), Str) && Str.empty())
- return emitPutChar(B.getInt32('\n'), B, TLI);
-
- return nullptr;
-}
-
-bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) {
- LibFunc Func;
- SmallString<20> FloatFuncName = FuncName;
- FloatFuncName += 'f';
- if (TLI->getLibFunc(FloatFuncName, Func))
- return TLI->has(Func);
- return false;
-}
-
-Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
- IRBuilder<> &Builder) {
- LibFunc Func;
- Function *Callee = CI->getCalledFunction();
- // Check for string/memory library functions.
- if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) {
- // Make sure we never change the calling convention.
- assert((ignoreCallingConv(Func) ||
- isCallingConvCCompatible(CI)) &&
- "Optimizing string/memory libcall would change the calling convention");
- switch (Func) {
- case LibFunc_strcat:
- return optimizeStrCat(CI, Builder);
- case LibFunc_strncat:
- return optimizeStrNCat(CI, Builder);
- case LibFunc_strchr:
- return optimizeStrChr(CI, Builder);
- case LibFunc_strrchr:
- return optimizeStrRChr(CI, Builder);
- case LibFunc_strcmp:
- return optimizeStrCmp(CI, Builder);
- case LibFunc_strncmp:
- return optimizeStrNCmp(CI, Builder);
- case LibFunc_strcpy:
- return optimizeStrCpy(CI, Builder);
- case LibFunc_stpcpy:
- return optimizeStpCpy(CI, Builder);
- case LibFunc_strncpy:
- return optimizeStrNCpy(CI, Builder);
- case LibFunc_strlen:
- return optimizeStrLen(CI, Builder);
- case LibFunc_strpbrk:
- return optimizeStrPBrk(CI, Builder);
- case LibFunc_strtol:
- case LibFunc_strtod:
- case LibFunc_strtof:
- case LibFunc_strtoul:
- case LibFunc_strtoll:
- case LibFunc_strtold:
- case LibFunc_strtoull:
- return optimizeStrTo(CI, Builder);
- case LibFunc_strspn:
- return optimizeStrSpn(CI, Builder);
- case LibFunc_strcspn:
- return optimizeStrCSpn(CI, Builder);
- case LibFunc_strstr:
- return optimizeStrStr(CI, Builder);
- case LibFunc_memchr:
- return optimizeMemChr(CI, Builder);
- case LibFunc_bcmp:
- return optimizeBCmp(CI, Builder);
- case LibFunc_memcmp:
- return optimizeMemCmp(CI, Builder);
- case LibFunc_memcpy:
- return optimizeMemCpy(CI, Builder);
- case LibFunc_memmove:
- return optimizeMemMove(CI, Builder);
- case LibFunc_memset:
- return optimizeMemSet(CI, Builder);
- case LibFunc_realloc:
- return optimizeRealloc(CI, Builder);
- case LibFunc_wcslen:
- return optimizeWcslen(CI, Builder);
- default:
- break;
- }
- }
- return nullptr;
-}
-
-Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
- LibFunc Func,
- IRBuilder<> &Builder) {
- // Don't optimize calls that require strict floating point semantics.
- if (CI->isStrictFP())
- return nullptr;
-
- if (Value *V = optimizeTrigReflections(CI, Func, Builder))
- return V;
-
- switch (Func) {
- case LibFunc_sinpif:
- case LibFunc_sinpi:
- case LibFunc_cospif:
- case LibFunc_cospi:
- return optimizeSinCosPi(CI, Builder);
- case LibFunc_powf:
- case LibFunc_pow:
- case LibFunc_powl:
- return optimizePow(CI, Builder);
- case LibFunc_exp2l:
- case LibFunc_exp2:
- case LibFunc_exp2f:
- return optimizeExp2(CI, Builder);
- case LibFunc_fabsf:
- case LibFunc_fabs:
- case LibFunc_fabsl:
- return replaceUnaryCall(CI, Builder, Intrinsic::fabs);
- case LibFunc_sqrtf:
- case LibFunc_sqrt:
- case LibFunc_sqrtl:
- return optimizeSqrt(CI, Builder);
- case LibFunc_log:
- case LibFunc_log10:
- case LibFunc_log1p:
- case LibFunc_log2:
- case LibFunc_logb:
- return optimizeLog(CI, Builder);
- case LibFunc_tan:
- case LibFunc_tanf:
- case LibFunc_tanl:
- return optimizeTan(CI, Builder);
- case LibFunc_ceil:
- return replaceUnaryCall(CI, Builder, Intrinsic::ceil);
- case LibFunc_floor:
- return replaceUnaryCall(CI, Builder, Intrinsic::floor);
- case LibFunc_round:
- return replaceUnaryCall(CI, Builder, Intrinsic::round);
- case LibFunc_nearbyint:
- return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint);
- case LibFunc_rint:
- return replaceUnaryCall(CI, Builder, Intrinsic::rint);
- case LibFunc_trunc:
- return replaceUnaryCall(CI, Builder, Intrinsic::trunc);
- case LibFunc_acos:
- case LibFunc_acosh:
- case LibFunc_asin:
- case LibFunc_asinh:
- case LibFunc_atan:
- case LibFunc_atanh:
- case LibFunc_cbrt:
- case LibFunc_cosh:
- case LibFunc_exp:
- case LibFunc_exp10:
- case LibFunc_expm1:
- case LibFunc_cos:
- case LibFunc_sin:
- case LibFunc_sinh:
- case LibFunc_tanh:
- if (UnsafeFPShrink && hasFloatVersion(CI->getCalledFunction()->getName()))
- return optimizeUnaryDoubleFP(CI, Builder, true);
- return nullptr;
- case LibFunc_copysign:
- if (hasFloatVersion(CI->getCalledFunction()->getName()))
- return optimizeBinaryDoubleFP(CI, Builder);
- return nullptr;
- case LibFunc_fminf:
- case LibFunc_fmin:
- case LibFunc_fminl:
- case LibFunc_fmaxf:
- case LibFunc_fmax:
- case LibFunc_fmaxl:
- return optimizeFMinFMax(CI, Builder);
- case LibFunc_cabs:
- case LibFunc_cabsf:
- case LibFunc_cabsl:
- return optimizeCAbs(CI, Builder);
- default:
- return nullptr;
- }
-}
-
-Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
- // TODO: Split out the code below that operates on FP calls so that
- // we can all non-FP calls with the StrictFP attribute to be
- // optimized.
- if (CI->isNoBuiltin())
- return nullptr;
-
- LibFunc Func;
- Function *Callee = CI->getCalledFunction();
-
- SmallVector<OperandBundleDef, 2> OpBundles;
- CI->getOperandBundlesAsDefs(OpBundles);
- IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles);
- bool isCallingConvC = isCallingConvCCompatible(CI);
-
- // Command-line parameter overrides instruction attribute.
- // This can't be moved to optimizeFloatingPointLibCall() because it may be
- // used by the intrinsic optimizations.
- if (EnableUnsafeFPShrink.getNumOccurrences() > 0)
- UnsafeFPShrink = EnableUnsafeFPShrink;
- else if (isa<FPMathOperator>(CI) && CI->isFast())
- UnsafeFPShrink = true;
-
- // First, check for intrinsics.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
- if (!isCallingConvC)
- return nullptr;
- // The FP intrinsics have corresponding constrained versions so we don't
- // need to check for the StrictFP attribute here.
- switch (II->getIntrinsicID()) {
- case Intrinsic::pow:
- return optimizePow(CI, Builder);
- case Intrinsic::exp2:
- return optimizeExp2(CI, Builder);
- case Intrinsic::log:
- return optimizeLog(CI, Builder);
- case Intrinsic::sqrt:
- return optimizeSqrt(CI, Builder);
- // TODO: Use foldMallocMemset() with memset intrinsic.
- default:
- return nullptr;
- }
- }
-
- // Also try to simplify calls to fortified library functions.
- if (Value *SimplifiedFortifiedCI = FortifiedSimplifier.optimizeCall(CI)) {
- // Try to further simplify the result.
- CallInst *SimplifiedCI = dyn_cast<CallInst>(SimplifiedFortifiedCI);
- if (SimplifiedCI && SimplifiedCI->getCalledFunction()) {
- // Use an IR Builder from SimplifiedCI if available instead of CI
- // to guarantee we reach all uses we might replace later on.
- IRBuilder<> TmpBuilder(SimplifiedCI);
- if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, TmpBuilder)) {
- // If we were able to further simplify, remove the now redundant call.
- SimplifiedCI->replaceAllUsesWith(V);
- eraseFromParent(SimplifiedCI);
- return V;
- }
- }
- return SimplifiedFortifiedCI;
- }
-
- // Then check for known library functions.
- if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) {
- // We never change the calling convention.
- if (!ignoreCallingConv(Func) && !isCallingConvC)
- return nullptr;
- if (Value *V = optimizeStringMemoryLibCall(CI, Builder))
- return V;
- if (Value *V = optimizeFloatingPointLibCall(CI, Func, Builder))
- return V;
- switch (Func) {
- case LibFunc_ffs:
- case LibFunc_ffsl:
- case LibFunc_ffsll:
- return optimizeFFS(CI, Builder);
- case LibFunc_fls:
- case LibFunc_flsl:
- case LibFunc_flsll:
- return optimizeFls(CI, Builder);
- case LibFunc_abs:
- case LibFunc_labs:
- case LibFunc_llabs:
- return optimizeAbs(CI, Builder);
- case LibFunc_isdigit:
- return optimizeIsDigit(CI, Builder);
- case LibFunc_isascii:
- return optimizeIsAscii(CI, Builder);
- case LibFunc_toascii:
- return optimizeToAscii(CI, Builder);
- case LibFunc_atoi:
- case LibFunc_atol:
- case LibFunc_atoll:
- return optimizeAtoi(CI, Builder);
- case LibFunc_strtol:
- case LibFunc_strtoll:
- return optimizeStrtol(CI, Builder);
- case LibFunc_printf:
- return optimizePrintF(CI, Builder);
- case LibFunc_sprintf:
- return optimizeSPrintF(CI, Builder);
- case LibFunc_snprintf:
- return optimizeSnPrintF(CI, Builder);
- case LibFunc_fprintf:
- return optimizeFPrintF(CI, Builder);
- case LibFunc_fwrite:
- return optimizeFWrite(CI, Builder);
- case LibFunc_fread:
- return optimizeFRead(CI, Builder);
- case LibFunc_fputs:
- return optimizeFPuts(CI, Builder);
- case LibFunc_fgets:
- return optimizeFGets(CI, Builder);
- case LibFunc_fputc:
- return optimizeFPutc(CI, Builder);
- case LibFunc_fgetc:
- return optimizeFGetc(CI, Builder);
- case LibFunc_puts:
- return optimizePuts(CI, Builder);
- case LibFunc_perror:
- return optimizeErrorReporting(CI, Builder);
- case LibFunc_vfprintf:
- case LibFunc_fiprintf:
- return optimizeErrorReporting(CI, Builder, 0);
- default:
- return nullptr;
- }
- }
- return nullptr;
-}
-
-LibCallSimplifier::LibCallSimplifier(
- const DataLayout &DL, const TargetLibraryInfo *TLI,
- OptimizationRemarkEmitter &ORE,
- BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
- function_ref<void(Instruction *, Value *)> Replacer,
- function_ref<void(Instruction *)> Eraser)
- : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), BFI(BFI), PSI(PSI),
- UnsafeFPShrink(false), Replacer(Replacer), Eraser(Eraser) {}
-
-void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
- // Indirect through the replacer used in this instance.
- Replacer(I, With);
-}
-
-void LibCallSimplifier::eraseFromParent(Instruction *I) {
- Eraser(I);
-}
-
-// TODO:
-// Additional cases that we need to add to this file:
-//
-// cbrt:
-// * cbrt(expN(X)) -> expN(x/3)
-// * cbrt(sqrt(x)) -> pow(x,1/6)
-// * cbrt(cbrt(x)) -> pow(x,1/9)
-//
-// exp, expf, expl:
-// * exp(log(x)) -> x
-//
-// log, logf, logl:
-// * log(exp(x)) -> x
-// * log(exp(y)) -> y*log(e)
-// * log(exp10(y)) -> y*log(10)
-// * log(sqrt(x)) -> 0.5*log(x)
-//
-// pow, powf, powl:
-// * pow(sqrt(x),y) -> pow(x,y*0.5)
-// * pow(pow(x,y),z)-> pow(x,y*z)
-//
-// signbit:
-// * signbit(cnst) -> cnst'
-// * signbit(nncst) -> 0 (if pstv is a non-negative constant)
-//
-// sqrt, sqrtf, sqrtl:
-// * sqrt(expN(x)) -> expN(x*0.5)
-// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
-// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
-//
-
-//===----------------------------------------------------------------------===//
-// Fortified Library Call Optimizations
-//===----------------------------------------------------------------------===//
-
-bool
-FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
- unsigned ObjSizeOp,
- Optional<unsigned> SizeOp,
- Optional<unsigned> StrOp,
- Optional<unsigned> FlagOp) {
- // If this function takes a flag argument, the implementation may use it to
- // perform extra checks. Don't fold into the non-checking variant.
- if (FlagOp) {
- ConstantInt *Flag = dyn_cast<ConstantInt>(CI->getArgOperand(*FlagOp));
- if (!Flag || !Flag->isZero())
- return false;
- }
-
- if (SizeOp && CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(*SizeOp))
- return true;
-
- if (ConstantInt *ObjSizeCI =
- dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) {
- if (ObjSizeCI->isMinusOne())
- return true;
- // If the object size wasn't -1 (unknown), bail out if we were asked to.
- if (OnlyLowerUnknownSize)
- return false;
- if (StrOp) {
- uint64_t Len = GetStringLength(CI->getArgOperand(*StrOp));
- // If the length is 0 we don't know how long it is and so we can't
- // remove the check.
- if (Len == 0)
- return false;
- return ObjSizeCI->getZExtValue() >= Len;
- }
-
- if (SizeOp) {
- if (ConstantInt *SizeCI =
- dyn_cast<ConstantInt>(CI->getArgOperand(*SizeOp)))
- return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue();
- }
- }
- return false;
-}
-
-Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
- IRBuilder<> &B) {
- if (isFortifiedCallFoldable(CI, 3, 2)) {
- B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
- CI->getArgOperand(2));
- return CI->getArgOperand(0);
- }
- return nullptr;
-}
-
-Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
- IRBuilder<> &B) {
- if (isFortifiedCallFoldable(CI, 3, 2)) {
- B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
- CI->getArgOperand(2));
- return CI->getArgOperand(0);
- }
- return nullptr;
-}
-
-Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
- IRBuilder<> &B) {
- // TODO: Try foldMallocMemset() here.
-
- if (isFortifiedCallFoldable(CI, 3, 2)) {
- Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
- B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
- return CI->getArgOperand(0);
- }
- return nullptr;
-}
-
-Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
- IRBuilder<> &B,
- LibFunc Func) {
- const DataLayout &DL = CI->getModule()->getDataLayout();
- Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1),
- *ObjSize = CI->getArgOperand(2);
-
- // __stpcpy_chk(x,x,...) -> x+strlen(x)
- if (Func == LibFunc_stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
- Value *StrLen = emitStrLen(Src, B, DL, TLI);
- return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
- }
-
- // If a) we don't have any length information, or b) we know this will
- // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
- // st[rp]cpy_chk call which may fail at runtime if the size is too long.
- // TODO: It might be nice to get a maximum length out of the possible
- // string lengths for varying.
- if (isFortifiedCallFoldable(CI, 2, None, 1)) {
- if (Func == LibFunc_strcpy_chk)
- return emitStrCpy(Dst, Src, B, TLI);
- else
- return emitStpCpy(Dst, Src, B, TLI);
- }
-
- if (OnlyLowerUnknownSize)
- return nullptr;
-
- // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
- uint64_t Len = GetStringLength(Src);
- if (Len == 0)
- return nullptr;
-
- Type *SizeTTy = DL.getIntPtrType(CI->getContext());
- Value *LenV = ConstantInt::get(SizeTTy, Len);
- Value *Ret = emitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
- // If the function was an __stpcpy_chk, and we were able to fold it into
- // a __memcpy_chk, we still need to return the correct end pointer.
- if (Ret && Func == LibFunc_stpcpy_chk)
- return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1));
- return Ret;
-}
-
-Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
- IRBuilder<> &B,
- LibFunc Func) {
- if (isFortifiedCallFoldable(CI, 3, 2)) {
- if (Func == LibFunc_strncpy_chk)
- return emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, TLI);
- else
- return emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, TLI);
- }
-
- return nullptr;
-}
-
-Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI,
- IRBuilder<> &B) {
- if (isFortifiedCallFoldable(CI, 4, 3))
- return emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), CI->getArgOperand(3), B, TLI);
-
- return nullptr;
-}
-
-Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI,
- IRBuilder<> &B) {
- if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) {
- SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 5, CI->arg_end());
- return emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(4), VariadicArgs, B, TLI);
- }
-
- return nullptr;
-}
-
-Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI,
- IRBuilder<> &B) {
- if (isFortifiedCallFoldable(CI, 2, None, None, 1)) {
- SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 4, CI->arg_end());
- return emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), VariadicArgs,
- B, TLI);
- }
-
- return nullptr;
-}
-
-Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI,
- IRBuilder<> &B) {
- if (isFortifiedCallFoldable(CI, 2))
- return emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI);
-
- return nullptr;
-}
-
-Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI,
- IRBuilder<> &B) {
- if (isFortifiedCallFoldable(CI, 3))
- return emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, TLI);
-
- return nullptr;
-}
-
-Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI,
- IRBuilder<> &B) {
- if (isFortifiedCallFoldable(CI, 3))
- return emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, TLI);
-
- return nullptr;
-}
-
-Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI,
- IRBuilder<> &B) {
- if (isFortifiedCallFoldable(CI, 3))
- return emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, TLI);
-
- return nullptr;
-}
-
-Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI,
- IRBuilder<> &B) {
- if (isFortifiedCallFoldable(CI, 3, 1, None, 2))
- return emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(4), CI->getArgOperand(5), B, TLI);
-
- return nullptr;
-}
-
-Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI,
- IRBuilder<> &B) {
- if (isFortifiedCallFoldable(CI, 2, None, None, 1))
- return emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
- CI->getArgOperand(4), B, TLI);
-
- return nullptr;
-}
-
-Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
- // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here.
- // Some clang users checked for _chk libcall availability using:
- // __has_builtin(__builtin___memcpy_chk)
- // When compiling with -fno-builtin, this is always true.
- // When passing -ffreestanding/-mkernel, which both imply -fno-builtin, we
- // end up with fortified libcalls, which isn't acceptable in a freestanding
- // environment which only provides their non-fortified counterparts.
- //
- // Until we change clang and/or teach external users to check for availability
- // differently, disregard the "nobuiltin" attribute and TLI::has.
- //
- // PR23093.
-
- LibFunc Func;
- Function *Callee = CI->getCalledFunction();
-
- SmallVector<OperandBundleDef, 2> OpBundles;
- CI->getOperandBundlesAsDefs(OpBundles);
- IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles);
- bool isCallingConvC = isCallingConvCCompatible(CI);
-
- // First, check that this is a known library functions and that the prototype
- // is correct.
- if (!TLI->getLibFunc(*Callee, Func))
- return nullptr;
-
- // We never change the calling convention.
- if (!ignoreCallingConv(Func) && !isCallingConvC)
- return nullptr;
-
- switch (Func) {
- case LibFunc_memcpy_chk:
- return optimizeMemCpyChk(CI, Builder);
- case LibFunc_memmove_chk:
- return optimizeMemMoveChk(CI, Builder);
- case LibFunc_memset_chk:
- return optimizeMemSetChk(CI, Builder);
- case LibFunc_stpcpy_chk:
- case LibFunc_strcpy_chk:
- return optimizeStrpCpyChk(CI, Builder, Func);
- case LibFunc_stpncpy_chk:
- case LibFunc_strncpy_chk:
- return optimizeStrpNCpyChk(CI, Builder, Func);
- case LibFunc_memccpy_chk:
- return optimizeMemCCpyChk(CI, Builder);
- case LibFunc_snprintf_chk:
- return optimizeSNPrintfChk(CI, Builder);
- case LibFunc_sprintf_chk:
- return optimizeSPrintfChk(CI, Builder);
- case LibFunc_strcat_chk:
- return optimizeStrCatChk(CI, Builder);
- case LibFunc_strlcat_chk:
- return optimizeStrLCat(CI, Builder);
- case LibFunc_strncat_chk:
- return optimizeStrNCatChk(CI, Builder);
- case LibFunc_strlcpy_chk:
- return optimizeStrLCpyChk(CI, Builder);
- case LibFunc_vsnprintf_chk:
- return optimizeVSNPrintfChk(CI, Builder);
- case LibFunc_vsprintf_chk:
- return optimizeVSPrintfChk(CI, Builder);
- default:
- break;
- }
- return nullptr;
-}
-
-FortifiedLibCallSimplifier::FortifiedLibCallSimplifier(
- const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize)
- : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {}
diff --git a/contrib/llvm/lib/Transforms/Utils/SizeOpts.cpp b/contrib/llvm/lib/Transforms/Utils/SizeOpts.cpp
deleted file mode 100644
index 1519751197d2..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/SizeOpts.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-//===-- SizeOpts.cpp - code size optimization related code ----------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains some shared code size optimization related code.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Transforms/Utils/SizeOpts.h"
-using namespace llvm;
-
-static cl::opt<bool> ProfileGuidedSizeOpt(
- "pgso", cl::Hidden, cl::init(true),
- cl::desc("Enable the profile guided size optimization. "));
-
-bool llvm::shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI) {
- assert(F);
- if (!PSI || !BFI || !PSI->hasProfileSummary())
- return false;
- return ProfileGuidedSizeOpt && PSI->isFunctionColdInCallGraph(F, *BFI);
-}
-
-bool llvm::shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI) {
- assert(BB);
- if (!PSI || !BFI || !PSI->hasProfileSummary())
- return false;
- return ProfileGuidedSizeOpt && PSI->isColdBlock(BB, BFI);
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp
deleted file mode 100644
index e2c387cb8983..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp
+++ /dev/null
@@ -1,284 +0,0 @@
-//===- SplitModule.cpp - Split a module into partitions -------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the function llvm::SplitModule, which splits a module
-// into multiple linkable partitions. It can be used to implement parallel code
-// generation for link-time optimization.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/SplitModule.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/EquivalenceClasses.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/Comdat.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalObject.h"
-#include "llvm/IR/GlobalIndirectSymbol.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MD5.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-#include <algorithm>
-#include <cassert>
-#include <iterator>
-#include <memory>
-#include <queue>
-#include <utility>
-#include <vector>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "split-module"
-
-namespace {
-
-using ClusterMapType = EquivalenceClasses<const GlobalValue *>;
-using ComdatMembersType = DenseMap<const Comdat *, const GlobalValue *>;
-using ClusterIDMapType = DenseMap<const GlobalValue *, unsigned>;
-
-} // end anonymous namespace
-
-static void addNonConstUser(ClusterMapType &GVtoClusterMap,
- const GlobalValue *GV, const User *U) {
- assert((!isa<Constant>(U) || isa<GlobalValue>(U)) && "Bad user");
-
- if (const Instruction *I = dyn_cast<Instruction>(U)) {
- const GlobalValue *F = I->getParent()->getParent();
- GVtoClusterMap.unionSets(GV, F);
- } else if (isa<GlobalIndirectSymbol>(U) || isa<Function>(U) ||
- isa<GlobalVariable>(U)) {
- GVtoClusterMap.unionSets(GV, cast<GlobalValue>(U));
- } else {
- llvm_unreachable("Underimplemented use case");
- }
-}
-
-// Adds all GlobalValue users of V to the same cluster as GV.
-static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap,
- const GlobalValue *GV, const Value *V) {
- for (auto *U : V->users()) {
- SmallVector<const User *, 4> Worklist;
- Worklist.push_back(U);
- while (!Worklist.empty()) {
- const User *UU = Worklist.pop_back_val();
- // For each constant that is not a GV (a pure const) recurse.
- if (isa<Constant>(UU) && !isa<GlobalValue>(UU)) {
- Worklist.append(UU->user_begin(), UU->user_end());
- continue;
- }
- addNonConstUser(GVtoClusterMap, GV, UU);
- }
- }
-}
-
-// Find partitions for module in the way that no locals need to be
-// globalized.
-// Try to balance pack those partitions into N files since this roughly equals
-// thread balancing for the backend codegen step.
-static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,
- unsigned N) {
- // At this point module should have the proper mix of globals and locals.
- // As we attempt to partition this module, we must not change any
- // locals to globals.
- LLVM_DEBUG(dbgs() << "Partition module with (" << M->size()
- << ")functions\n");
- ClusterMapType GVtoClusterMap;
- ComdatMembersType ComdatMembers;
-
- auto recordGVSet = [&GVtoClusterMap, &ComdatMembers](GlobalValue &GV) {
- if (GV.isDeclaration())
- return;
-
- if (!GV.hasName())
- GV.setName("__llvmsplit_unnamed");
-
- // Comdat groups must not be partitioned. For comdat groups that contain
- // locals, record all their members here so we can keep them together.
- // Comdat groups that only contain external globals are already handled by
- // the MD5-based partitioning.
- if (const Comdat *C = GV.getComdat()) {
- auto &Member = ComdatMembers[C];
- if (Member)
- GVtoClusterMap.unionSets(Member, &GV);
- else
- Member = &GV;
- }
-
- // For aliases we should not separate them from their aliasees regardless
- // of linkage.
- if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(&GV)) {
- if (const GlobalObject *Base = GIS->getBaseObject())
- GVtoClusterMap.unionSets(&GV, Base);
- }
-
- if (const Function *F = dyn_cast<Function>(&GV)) {
- for (const BasicBlock &BB : *F) {
- BlockAddress *BA = BlockAddress::lookup(&BB);
- if (!BA || !BA->isConstantUsed())
- continue;
- addAllGlobalValueUsers(GVtoClusterMap, F, BA);
- }
- }
-
- if (GV.hasLocalLinkage())
- addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV);
- };
-
- llvm::for_each(M->functions(), recordGVSet);
- llvm::for_each(M->globals(), recordGVSet);
- llvm::for_each(M->aliases(), recordGVSet);
-
- // Assigned all GVs to merged clusters while balancing number of objects in
- // each.
- auto CompareClusters = [](const std::pair<unsigned, unsigned> &a,
- const std::pair<unsigned, unsigned> &b) {
- if (a.second || b.second)
- return a.second > b.second;
- else
- return a.first > b.first;
- };
-
- std::priority_queue<std::pair<unsigned, unsigned>,
- std::vector<std::pair<unsigned, unsigned>>,
- decltype(CompareClusters)>
- BalancinQueue(CompareClusters);
- // Pre-populate priority queue with N slot blanks.
- for (unsigned i = 0; i < N; ++i)
- BalancinQueue.push(std::make_pair(i, 0));
-
- using SortType = std::pair<unsigned, ClusterMapType::iterator>;
-
- SmallVector<SortType, 64> Sets;
- SmallPtrSet<const GlobalValue *, 32> Visited;
-
- // To guarantee determinism, we have to sort SCC according to size.
- // When size is the same, use leader's name.
- for (ClusterMapType::iterator I = GVtoClusterMap.begin(),
- E = GVtoClusterMap.end(); I != E; ++I)
- if (I->isLeader())
- Sets.push_back(
- std::make_pair(std::distance(GVtoClusterMap.member_begin(I),
- GVtoClusterMap.member_end()), I));
-
- llvm::sort(Sets, [](const SortType &a, const SortType &b) {
- if (a.first == b.first)
- return a.second->getData()->getName() > b.second->getData()->getName();
- else
- return a.first > b.first;
- });
-
- for (auto &I : Sets) {
- unsigned CurrentClusterID = BalancinQueue.top().first;
- unsigned CurrentClusterSize = BalancinQueue.top().second;
- BalancinQueue.pop();
-
- LLVM_DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size("
- << I.first << ") ----> " << I.second->getData()->getName()
- << "\n");
-
- for (ClusterMapType::member_iterator MI =
- GVtoClusterMap.findLeader(I.second);
- MI != GVtoClusterMap.member_end(); ++MI) {
- if (!Visited.insert(*MI).second)
- continue;
- LLVM_DEBUG(dbgs() << "----> " << (*MI)->getName()
- << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n");
- Visited.insert(*MI);
- ClusterIDMap[*MI] = CurrentClusterID;
- CurrentClusterSize++;
- }
- // Add this set size to the number of entries in this cluster.
- BalancinQueue.push(std::make_pair(CurrentClusterID, CurrentClusterSize));
- }
-}
-
-static void externalize(GlobalValue *GV) {
- if (GV->hasLocalLinkage()) {
- GV->setLinkage(GlobalValue::ExternalLinkage);
- GV->setVisibility(GlobalValue::HiddenVisibility);
- }
-
- // Unnamed entities must be named consistently between modules. setName will
- // give a distinct name to each such entity.
- if (!GV->hasName())
- GV->setName("__llvmsplit_unnamed");
-}
-
-// Returns whether GV should be in partition (0-based) I of N.
-static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) {
- if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(GV))
- if (const GlobalObject *Base = GIS->getBaseObject())
- GV = Base;
-
- StringRef Name;
- if (const Comdat *C = GV->getComdat())
- Name = C->getName();
- else
- Name = GV->getName();
-
- // Partition by MD5 hash. We only need a few bits for evenness as the number
- // of partitions will generally be in the 1-2 figure range; the low 16 bits
- // are enough.
- MD5 H;
- MD5::MD5Result R;
- H.update(Name);
- H.final(R);
- return (R[0] | (R[1] << 8)) % N == I;
-}
-
-void llvm::SplitModule(
- std::unique_ptr<Module> M, unsigned N,
- function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback,
- bool PreserveLocals) {
- if (!PreserveLocals) {
- for (Function &F : *M)
- externalize(&F);
- for (GlobalVariable &GV : M->globals())
- externalize(&GV);
- for (GlobalAlias &GA : M->aliases())
- externalize(&GA);
- for (GlobalIFunc &GIF : M->ifuncs())
- externalize(&GIF);
- }
-
- // This performs splitting without a need for externalization, which might not
- // always be possible.
- ClusterIDMapType ClusterIDMap;
- findPartitions(M.get(), ClusterIDMap, N);
-
- // FIXME: We should be able to reuse M as the last partition instead of
- // cloning it.
- for (unsigned I = 0; I < N; ++I) {
- ValueToValueMapTy VMap;
- std::unique_ptr<Module> MPart(
- CloneModule(*M, VMap, [&](const GlobalValue *GV) {
- if (ClusterIDMap.count(GV))
- return (ClusterIDMap[GV] == I);
- else
- return isInPartition(GV, I, N);
- }));
- if (I != 0)
- MPart->setModuleInlineAsm("");
- ModuleCallback(std::move(MPart));
- }
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
deleted file mode 100644
index 50844cf9d1c5..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-//===- StripGCRelocates.cpp - Remove gc.relocates inserted by RewriteStatePoints===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This is a little utility pass that removes the gc.relocates inserted by
-// RewriteStatepointsForGC. Note that the generated IR is incorrect,
-// but this is useful as a single pass in itself, for analysis of IR, without
-// the GC.relocates. The statepoint and gc.result instrinsics would still be
-// present.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/IR/Function.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Statepoint.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
-struct StripGCRelocates : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- StripGCRelocates() : FunctionPass(ID) {
- initializeStripGCRelocatesPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &Info) const override {}
-
- bool runOnFunction(Function &F) override;
-
-};
-char StripGCRelocates::ID = 0;
-}
-
-bool StripGCRelocates::runOnFunction(Function &F) {
- // Nothing to do for declarations.
- if (F.isDeclaration())
- return false;
- SmallVector<GCRelocateInst *, 20> GCRelocates;
- // TODO: We currently do not handle gc.relocates that are in landing pads,
- // i.e. not bound to a single statepoint token.
- for (Instruction &I : instructions(F)) {
- if (auto *GCR = dyn_cast<GCRelocateInst>(&I))
- if (isStatepoint(GCR->getOperand(0)))
- GCRelocates.push_back(GCR);
- }
- // All gc.relocates are bound to a single statepoint token. The order of
- // visiting gc.relocates for deletion does not matter.
- for (GCRelocateInst *GCRel : GCRelocates) {
- Value *OrigPtr = GCRel->getDerivedPtr();
- Value *ReplaceGCRel = OrigPtr;
-
- // All gc_relocates are i8 addrspace(1)* typed, we need a bitcast from i8
- // addrspace(1)* to the type of the OrigPtr, if the are not the same.
- if (GCRel->getType() != OrigPtr->getType())
- ReplaceGCRel = new BitCastInst(OrigPtr, GCRel->getType(), "cast", GCRel);
-
- // Replace all uses of gc.relocate and delete the gc.relocate
- // There maybe unncessary bitcasts back to the OrigPtr type, an instcombine
- // pass would clear this up.
- GCRel->replaceAllUsesWith(ReplaceGCRel);
- GCRel->eraseFromParent();
- }
- return !GCRelocates.empty();
-}
-
-INITIALIZE_PASS(StripGCRelocates, "strip-gc-relocates",
- "Strip gc.relocates inserted through RewriteStatepointsForGC",
- true, false)
diff --git a/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
deleted file mode 100644
index 97a4533fabe5..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-//===- StripNonLineTableDebugInfo.cpp -- Strip parts of Debug Info --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils.h"
-using namespace llvm;
-
-namespace {
-
-/// This pass strips all debug info that is not related line tables.
-/// The result will be the same as if the program where compiled with
-/// -gline-tables-only.
-struct StripNonLineTableDebugInfo : public ModulePass {
- static char ID; // Pass identification, replacement for typeid
- StripNonLineTableDebugInfo() : ModulePass(ID) {
- initializeStripNonLineTableDebugInfoPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
-
- bool runOnModule(Module &M) override {
- return llvm::stripNonLineTableDebugInfo(M);
- }
-};
-}
-
-char StripNonLineTableDebugInfo::ID = 0;
-INITIALIZE_PASS(StripNonLineTableDebugInfo, "strip-nonlinetable-debuginfo",
- "Strip all debug info except linetables", false, false)
-
-ModulePass *llvm::createStripNonLineTableDebugInfoPass() {
- return new StripNonLineTableDebugInfo();
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
deleted file mode 100644
index 456724779b43..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
+++ /dev/null
@@ -1,584 +0,0 @@
-//===- SymbolRewriter.cpp - Symbol Rewriter -------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// SymbolRewriter is a LLVM pass which can rewrite symbols transparently within
-// existing code. It is implemented as a compiler pass and is configured via a
-// YAML configuration file.
-//
-// The YAML configuration file format is as follows:
-//
-// RewriteMapFile := RewriteDescriptors
-// RewriteDescriptors := RewriteDescriptor | RewriteDescriptors
-// RewriteDescriptor := RewriteDescriptorType ':' '{' RewriteDescriptorFields '}'
-// RewriteDescriptorFields := RewriteDescriptorField | RewriteDescriptorFields
-// RewriteDescriptorField := FieldIdentifier ':' FieldValue ','
-// RewriteDescriptorType := Identifier
-// FieldIdentifier := Identifier
-// FieldValue := Identifier
-// Identifier := [0-9a-zA-Z]+
-//
-// Currently, the following descriptor types are supported:
-//
-// - function: (function rewriting)
-// + Source (original name of the function)
-// + Target (explicit transformation)
-// + Transform (pattern transformation)
-// + Naked (boolean, whether the function is undecorated)
-// - global variable: (external linkage global variable rewriting)
-// + Source (original name of externally visible variable)
-// + Target (explicit transformation)
-// + Transform (pattern transformation)
-// - global alias: (global alias rewriting)
-// + Source (original name of the aliased name)
-// + Target (explicit transformation)
-// + Transform (pattern transformation)
-//
-// Note that source and exactly one of [Target, Transform] must be provided
-//
-// New rewrite descriptors can be created. Addding a new rewrite descriptor
-// involves:
-//
-// a) extended the rewrite descriptor kind enumeration
-// (<anonymous>::RewriteDescriptor::RewriteDescriptorType)
-// b) implementing the new descriptor
-// (c.f. <anonymous>::ExplicitRewriteFunctionDescriptor)
-// c) extending the rewrite map parser
-// (<anonymous>::RewriteMapParser::parseEntry)
-//
-// Specify to rewrite the symbols using the `-rewrite-symbols` option, and
-// specify the map file to use for the rewriting via the `-rewrite-map-file`
-// option.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/SymbolRewriter.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/ilist.h"
-#include "llvm/ADT/iterator_range.h"
-#include "llvm/IR/Comdat.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalObject.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Regex.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/YAMLParser.h"
-#include <memory>
-#include <string>
-#include <vector>
-
-using namespace llvm;
-using namespace SymbolRewriter;
-
-#define DEBUG_TYPE "symbol-rewriter"
-
-static cl::list<std::string> RewriteMapFiles("rewrite-map-file",
- cl::desc("Symbol Rewrite Map"),
- cl::value_desc("filename"),
- cl::Hidden);
-
-static void rewriteComdat(Module &M, GlobalObject *GO,
- const std::string &Source,
- const std::string &Target) {
- if (Comdat *CD = GO->getComdat()) {
- auto &Comdats = M.getComdatSymbolTable();
-
- Comdat *C = M.getOrInsertComdat(Target);
- C->setSelectionKind(CD->getSelectionKind());
- GO->setComdat(C);
-
- Comdats.erase(Comdats.find(Source));
- }
-}
-
-namespace {
-
-template <RewriteDescriptor::Type DT, typename ValueType,
- ValueType *(Module::*Get)(StringRef) const>
-class ExplicitRewriteDescriptor : public RewriteDescriptor {
-public:
- const std::string Source;
- const std::string Target;
-
- ExplicitRewriteDescriptor(StringRef S, StringRef T, const bool Naked)
- : RewriteDescriptor(DT), Source(Naked ? StringRef("\01" + S.str()) : S),
- Target(T) {}
-
- bool performOnModule(Module &M) override;
-
- static bool classof(const RewriteDescriptor *RD) {
- return RD->getType() == DT;
- }
-};
-
-} // end anonymous namespace
-
-template <RewriteDescriptor::Type DT, typename ValueType,
- ValueType *(Module::*Get)(StringRef) const>
-bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) {
- bool Changed = false;
- if (ValueType *S = (M.*Get)(Source)) {
- if (GlobalObject *GO = dyn_cast<GlobalObject>(S))
- rewriteComdat(M, GO, Source, Target);
-
- if (Value *T = (M.*Get)(Target))
- S->setValueName(T->getValueName());
- else
- S->setName(Target);
-
- Changed = true;
- }
- return Changed;
-}
-
-namespace {
-
-template <RewriteDescriptor::Type DT, typename ValueType,
- ValueType *(Module::*Get)(StringRef) const,
- iterator_range<typename iplist<ValueType>::iterator>
- (Module::*Iterator)()>
-class PatternRewriteDescriptor : public RewriteDescriptor {
-public:
- const std::string Pattern;
- const std::string Transform;
-
- PatternRewriteDescriptor(StringRef P, StringRef T)
- : RewriteDescriptor(DT), Pattern(P), Transform(T) { }
-
- bool performOnModule(Module &M) override;
-
- static bool classof(const RewriteDescriptor *RD) {
- return RD->getType() == DT;
- }
-};
-
-} // end anonymous namespace
-
-template <RewriteDescriptor::Type DT, typename ValueType,
- ValueType *(Module::*Get)(StringRef) const,
- iterator_range<typename iplist<ValueType>::iterator>
- (Module::*Iterator)()>
-bool PatternRewriteDescriptor<DT, ValueType, Get, Iterator>::
-performOnModule(Module &M) {
- bool Changed = false;
- for (auto &C : (M.*Iterator)()) {
- std::string Error;
-
- std::string Name = Regex(Pattern).sub(Transform, C.getName(), &Error);
- if (!Error.empty())
- report_fatal_error("unable to transforn " + C.getName() + " in " +
- M.getModuleIdentifier() + ": " + Error);
-
- if (C.getName() == Name)
- continue;
-
- if (GlobalObject *GO = dyn_cast<GlobalObject>(&C))
- rewriteComdat(M, GO, C.getName(), Name);
-
- if (Value *V = (M.*Get)(Name))
- C.setValueName(V->getValueName());
- else
- C.setName(Name);
-
- Changed = true;
- }
- return Changed;
-}
-
-namespace {
-
-/// Represents a rewrite for an explicitly named (function) symbol. Both the
-/// source function name and target function name of the transformation are
-/// explicitly spelt out.
-using ExplicitRewriteFunctionDescriptor =
- ExplicitRewriteDescriptor<RewriteDescriptor::Type::Function, Function,
- &Module::getFunction>;
-
-/// Represents a rewrite for an explicitly named (global variable) symbol. Both
-/// the source variable name and target variable name are spelt out. This
-/// applies only to module level variables.
-using ExplicitRewriteGlobalVariableDescriptor =
- ExplicitRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
- GlobalVariable, &Module::getGlobalVariable>;
-
-/// Represents a rewrite for an explicitly named global alias. Both the source
-/// and target name are explicitly spelt out.
-using ExplicitRewriteNamedAliasDescriptor =
- ExplicitRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias,
- &Module::getNamedAlias>;
-
-/// Represents a rewrite for a regular expression based pattern for functions.
-/// A pattern for the function name is provided and a transformation for that
-/// pattern to determine the target function name create the rewrite rule.
-using PatternRewriteFunctionDescriptor =
- PatternRewriteDescriptor<RewriteDescriptor::Type::Function, Function,
- &Module::getFunction, &Module::functions>;
-
-/// Represents a rewrite for a global variable based upon a matching pattern.
-/// Each global variable matching the provided pattern will be transformed as
-/// described in the transformation pattern for the target. Applies only to
-/// module level variables.
-using PatternRewriteGlobalVariableDescriptor =
- PatternRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
- GlobalVariable, &Module::getGlobalVariable,
- &Module::globals>;
-
-/// PatternRewriteNamedAliasDescriptor - represents a rewrite for global
-/// aliases which match a given pattern. The provided transformation will be
-/// applied to each of the matching names.
-using PatternRewriteNamedAliasDescriptor =
- PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias,
- &Module::getNamedAlias, &Module::aliases>;
-
-} // end anonymous namespace
-
-bool RewriteMapParser::parse(const std::string &MapFile,
- RewriteDescriptorList *DL) {
- ErrorOr<std::unique_ptr<MemoryBuffer>> Mapping =
- MemoryBuffer::getFile(MapFile);
-
- if (!Mapping)
- report_fatal_error("unable to read rewrite map '" + MapFile + "': " +
- Mapping.getError().message());
-
- if (!parse(*Mapping, DL))
- report_fatal_error("unable to parse rewrite map '" + MapFile + "'");
-
- return true;
-}
-
-bool RewriteMapParser::parse(std::unique_ptr<MemoryBuffer> &MapFile,
- RewriteDescriptorList *DL) {
- SourceMgr SM;
- yaml::Stream YS(MapFile->getBuffer(), SM);
-
- for (auto &Document : YS) {
- yaml::MappingNode *DescriptorList;
-
- // ignore empty documents
- if (isa<yaml::NullNode>(Document.getRoot()))
- continue;
-
- DescriptorList = dyn_cast<yaml::MappingNode>(Document.getRoot());
- if (!DescriptorList) {
- YS.printError(Document.getRoot(), "DescriptorList node must be a map");
- return false;
- }
-
- for (auto &Descriptor : *DescriptorList)
- if (!parseEntry(YS, Descriptor, DL))
- return false;
- }
-
- return true;
-}
-
-bool RewriteMapParser::parseEntry(yaml::Stream &YS, yaml::KeyValueNode &Entry,
- RewriteDescriptorList *DL) {
- yaml::ScalarNode *Key;
- yaml::MappingNode *Value;
- SmallString<32> KeyStorage;
- StringRef RewriteType;
-
- Key = dyn_cast<yaml::ScalarNode>(Entry.getKey());
- if (!Key) {
- YS.printError(Entry.getKey(), "rewrite type must be a scalar");
- return false;
- }
-
- Value = dyn_cast<yaml::MappingNode>(Entry.getValue());
- if (!Value) {
- YS.printError(Entry.getValue(), "rewrite descriptor must be a map");
- return false;
- }
-
- RewriteType = Key->getValue(KeyStorage);
- if (RewriteType.equals("function"))
- return parseRewriteFunctionDescriptor(YS, Key, Value, DL);
- else if (RewriteType.equals("global variable"))
- return parseRewriteGlobalVariableDescriptor(YS, Key, Value, DL);
- else if (RewriteType.equals("global alias"))
- return parseRewriteGlobalAliasDescriptor(YS, Key, Value, DL);
-
- YS.printError(Entry.getKey(), "unknown rewrite type");
- return false;
-}
-
-bool RewriteMapParser::
-parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
- yaml::MappingNode *Descriptor,
- RewriteDescriptorList *DL) {
- bool Naked = false;
- std::string Source;
- std::string Target;
- std::string Transform;
-
- for (auto &Field : *Descriptor) {
- yaml::ScalarNode *Key;
- yaml::ScalarNode *Value;
- SmallString<32> KeyStorage;
- SmallString<32> ValueStorage;
- StringRef KeyValue;
-
- Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
- if (!Key) {
- YS.printError(Field.getKey(), "descriptor key must be a scalar");
- return false;
- }
-
- Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
- if (!Value) {
- YS.printError(Field.getValue(), "descriptor value must be a scalar");
- return false;
- }
-
- KeyValue = Key->getValue(KeyStorage);
- if (KeyValue.equals("source")) {
- std::string Error;
-
- Source = Value->getValue(ValueStorage);
- if (!Regex(Source).isValid(Error)) {
- YS.printError(Field.getKey(), "invalid regex: " + Error);
- return false;
- }
- } else if (KeyValue.equals("target")) {
- Target = Value->getValue(ValueStorage);
- } else if (KeyValue.equals("transform")) {
- Transform = Value->getValue(ValueStorage);
- } else if (KeyValue.equals("naked")) {
- std::string Undecorated;
-
- Undecorated = Value->getValue(ValueStorage);
- Naked = StringRef(Undecorated).lower() == "true" || Undecorated == "1";
- } else {
- YS.printError(Field.getKey(), "unknown key for function");
- return false;
- }
- }
-
- if (Transform.empty() == Target.empty()) {
- YS.printError(Descriptor,
- "exactly one of transform or target must be specified");
- return false;
- }
-
- // TODO see if there is a more elegant solution to selecting the rewrite
- // descriptor type
- if (!Target.empty())
- DL->push_back(llvm::make_unique<ExplicitRewriteFunctionDescriptor>(
- Source, Target, Naked));
- else
- DL->push_back(
- llvm::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform));
-
- return true;
-}
-
-bool RewriteMapParser::
-parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
- yaml::MappingNode *Descriptor,
- RewriteDescriptorList *DL) {
- std::string Source;
- std::string Target;
- std::string Transform;
-
- for (auto &Field : *Descriptor) {
- yaml::ScalarNode *Key;
- yaml::ScalarNode *Value;
- SmallString<32> KeyStorage;
- SmallString<32> ValueStorage;
- StringRef KeyValue;
-
- Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
- if (!Key) {
- YS.printError(Field.getKey(), "descriptor Key must be a scalar");
- return false;
- }
-
- Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
- if (!Value) {
- YS.printError(Field.getValue(), "descriptor value must be a scalar");
- return false;
- }
-
- KeyValue = Key->getValue(KeyStorage);
- if (KeyValue.equals("source")) {
- std::string Error;
-
- Source = Value->getValue(ValueStorage);
- if (!Regex(Source).isValid(Error)) {
- YS.printError(Field.getKey(), "invalid regex: " + Error);
- return false;
- }
- } else if (KeyValue.equals("target")) {
- Target = Value->getValue(ValueStorage);
- } else if (KeyValue.equals("transform")) {
- Transform = Value->getValue(ValueStorage);
- } else {
- YS.printError(Field.getKey(), "unknown Key for Global Variable");
- return false;
- }
- }
-
- if (Transform.empty() == Target.empty()) {
- YS.printError(Descriptor,
- "exactly one of transform or target must be specified");
- return false;
- }
-
- if (!Target.empty())
- DL->push_back(llvm::make_unique<ExplicitRewriteGlobalVariableDescriptor>(
- Source, Target,
- /*Naked*/ false));
- else
- DL->push_back(llvm::make_unique<PatternRewriteGlobalVariableDescriptor>(
- Source, Transform));
-
- return true;
-}
-
-bool RewriteMapParser::
-parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
- yaml::MappingNode *Descriptor,
- RewriteDescriptorList *DL) {
- std::string Source;
- std::string Target;
- std::string Transform;
-
- for (auto &Field : *Descriptor) {
- yaml::ScalarNode *Key;
- yaml::ScalarNode *Value;
- SmallString<32> KeyStorage;
- SmallString<32> ValueStorage;
- StringRef KeyValue;
-
- Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
- if (!Key) {
- YS.printError(Field.getKey(), "descriptor key must be a scalar");
- return false;
- }
-
- Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
- if (!Value) {
- YS.printError(Field.getValue(), "descriptor value must be a scalar");
- return false;
- }
-
- KeyValue = Key->getValue(KeyStorage);
- if (KeyValue.equals("source")) {
- std::string Error;
-
- Source = Value->getValue(ValueStorage);
- if (!Regex(Source).isValid(Error)) {
- YS.printError(Field.getKey(), "invalid regex: " + Error);
- return false;
- }
- } else if (KeyValue.equals("target")) {
- Target = Value->getValue(ValueStorage);
- } else if (KeyValue.equals("transform")) {
- Transform = Value->getValue(ValueStorage);
- } else {
- YS.printError(Field.getKey(), "unknown key for Global Alias");
- return false;
- }
- }
-
- if (Transform.empty() == Target.empty()) {
- YS.printError(Descriptor,
- "exactly one of transform or target must be specified");
- return false;
- }
-
- if (!Target.empty())
- DL->push_back(llvm::make_unique<ExplicitRewriteNamedAliasDescriptor>(
- Source, Target,
- /*Naked*/ false));
- else
- DL->push_back(llvm::make_unique<PatternRewriteNamedAliasDescriptor>(
- Source, Transform));
-
- return true;
-}
-
-namespace {
-
-class RewriteSymbolsLegacyPass : public ModulePass {
-public:
- static char ID; // Pass identification, replacement for typeid
-
- RewriteSymbolsLegacyPass();
- RewriteSymbolsLegacyPass(SymbolRewriter::RewriteDescriptorList &DL);
-
- bool runOnModule(Module &M) override;
-
-private:
- RewriteSymbolPass Impl;
-};
-
-} // end anonymous namespace
-
-char RewriteSymbolsLegacyPass::ID = 0;
-
-RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID) {
- initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry());
-}
-
-RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass(
- SymbolRewriter::RewriteDescriptorList &DL)
- : ModulePass(ID), Impl(DL) {}
-
-bool RewriteSymbolsLegacyPass::runOnModule(Module &M) {
- return Impl.runImpl(M);
-}
-
-PreservedAnalyses RewriteSymbolPass::run(Module &M, ModuleAnalysisManager &AM) {
- if (!runImpl(M))
- return PreservedAnalyses::all();
-
- return PreservedAnalyses::none();
-}
-
-bool RewriteSymbolPass::runImpl(Module &M) {
- bool Changed;
-
- Changed = false;
- for (auto &Descriptor : Descriptors)
- Changed |= Descriptor->performOnModule(M);
-
- return Changed;
-}
-
-void RewriteSymbolPass::loadAndParseMapFiles() {
- const std::vector<std::string> MapFiles(RewriteMapFiles);
- SymbolRewriter::RewriteMapParser Parser;
-
- for (const auto &MapFile : MapFiles)
- Parser.parse(MapFile, &Descriptors);
-}
-
-INITIALIZE_PASS(RewriteSymbolsLegacyPass, "rewrite-symbols", "Rewrite Symbols",
- false, false)
-
-ModulePass *llvm::createRewriteSymbolsPass() {
- return new RewriteSymbolsLegacyPass();
-}
-
-ModulePass *
-llvm::createRewriteSymbolsPass(SymbolRewriter::RewriteDescriptorList &DL) {
- return new RewriteSymbolsLegacyPass(DL);
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
deleted file mode 100644
index 7f7bdf8a3d6d..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass is used to ensure that functions have at most one return
-// instruction in them. Additionally, it keeps track of which node is the new
-// exit node of the CFG. If there are no exit nodes in the CFG, the getExitNode
-// method will return a null pointer.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Transforms/Utils.h"
-using namespace llvm;
-
-char UnifyFunctionExitNodes::ID = 0;
-INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn",
- "Unify function exit nodes", false, false)
-
-Pass *llvm::createUnifyFunctionExitNodesPass() {
- return new UnifyFunctionExitNodes();
-}
-
-void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
- // We preserve the non-critical-edgeness property
- AU.addPreservedID(BreakCriticalEdgesID);
- // This is a cluster of orthogonal Transforms
- AU.addPreservedID(LowerSwitchID);
-}
-
-// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new
-// BasicBlock, and converting all returns to unconditional branches to this
-// new basic block. The singular exit node is returned.
-//
-// If there are no return stmts in the Function, a null pointer is returned.
-//
-bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
- // Loop over all of the blocks in a function, tracking all of the blocks that
- // return.
- //
- std::vector<BasicBlock*> ReturningBlocks;
- std::vector<BasicBlock*> UnreachableBlocks;
- for (BasicBlock &I : F)
- if (isa<ReturnInst>(I.getTerminator()))
- ReturningBlocks.push_back(&I);
- else if (isa<UnreachableInst>(I.getTerminator()))
- UnreachableBlocks.push_back(&I);
-
- // Then unreachable blocks.
- if (UnreachableBlocks.empty()) {
- UnreachableBlock = nullptr;
- } else if (UnreachableBlocks.size() == 1) {
- UnreachableBlock = UnreachableBlocks.front();
- } else {
- UnreachableBlock = BasicBlock::Create(F.getContext(),
- "UnifiedUnreachableBlock", &F);
- new UnreachableInst(F.getContext(), UnreachableBlock);
-
- for (BasicBlock *BB : UnreachableBlocks) {
- BB->getInstList().pop_back(); // Remove the unreachable inst.
- BranchInst::Create(UnreachableBlock, BB);
- }
- }
-
- // Now handle return blocks.
- if (ReturningBlocks.empty()) {
- ReturnBlock = nullptr;
- return false; // No blocks return
- } else if (ReturningBlocks.size() == 1) {
- ReturnBlock = ReturningBlocks.front(); // Already has a single return block
- return false;
- }
-
- // Otherwise, we need to insert a new basic block into the function, add a PHI
- // nodes (if the function returns values), and convert all of the return
- // instructions into unconditional branches.
- //
- BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
- "UnifiedReturnBlock", &F);
-
- PHINode *PN = nullptr;
- if (F.getReturnType()->isVoidTy()) {
- ReturnInst::Create(F.getContext(), nullptr, NewRetBlock);
- } else {
- // If the function doesn't return void... add a PHI node to the block...
- PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
- "UnifiedRetVal");
- NewRetBlock->getInstList().push_back(PN);
- ReturnInst::Create(F.getContext(), PN, NewRetBlock);
- }
-
- // Loop over all of the blocks, replacing the return instruction with an
- // unconditional branch.
- //
- for (BasicBlock *BB : ReturningBlocks) {
- // Add an incoming element to the PHI node for every return instruction that
- // is merging into this new block...
- if (PN)
- PN->addIncoming(BB->getTerminator()->getOperand(0), BB);
-
- BB->getInstList().pop_back(); // Remove the return insn
- BranchInst::Create(NewRetBlock, BB);
- }
- ReturnBlock = NewRetBlock;
- return true;
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
deleted file mode 100644
index 5272ab6e95d5..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/Utils.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-//===-- Utils.cpp - TransformUtils Infrastructure -------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the common initialization infrastructure for the
-// TransformUtils library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils.h"
-#include "llvm-c/Initialization.h"
-#include "llvm-c/Transforms/Utils.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/PassRegistry.h"
-
-using namespace llvm;
-
-/// initializeTransformUtils - Initialize all passes in the TransformUtils
-/// library.
-void llvm::initializeTransformUtils(PassRegistry &Registry) {
- initializeAddDiscriminatorsLegacyPassPass(Registry);
- initializeBreakCriticalEdgesPass(Registry);
- initializeCanonicalizeAliasesLegacyPassPass(Registry);
- initializeInstNamerPass(Registry);
- initializeLCSSAWrapperPassPass(Registry);
- initializeLibCallsShrinkWrapLegacyPassPass(Registry);
- initializeLoopSimplifyPass(Registry);
- initializeLowerInvokeLegacyPassPass(Registry);
- initializeLowerSwitchPass(Registry);
- initializeNameAnonGlobalLegacyPassPass(Registry);
- initializePromoteLegacyPassPass(Registry);
- initializeStripNonLineTableDebugInfoPass(Registry);
- initializeUnifyFunctionExitNodesPass(Registry);
- initializeMetaRenamerPass(Registry);
- initializeStripGCRelocatesPass(Registry);
- initializePredicateInfoPrinterLegacyPassPass(Registry);
-}
-
-/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
-void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) {
- initializeTransformUtils(*unwrap(R));
-}
-
-void LLVMAddLowerSwitchPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLowerSwitchPass());
-}
-
-void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createPromoteMemoryToRegisterPass());
-}
-
-void LLVMAddAddDiscriminatorsPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createAddDiscriminatorsPass());
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp b/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp
deleted file mode 100644
index a77bf50fe10b..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp
+++ /dev/null
@@ -1,539 +0,0 @@
-#include "llvm/Transforms/Utils/VNCoercion.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "vncoerce"
-namespace llvm {
-namespace VNCoercion {
-
-/// Return true if coerceAvailableValueToLoadType will succeed.
-bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
- const DataLayout &DL) {
- Type *StoredTy = StoredVal->getType();
- if (StoredTy == LoadTy)
- return true;
-
- // If the loaded or stored value is an first class array or struct, don't try
- // to transform them. We need to be able to bitcast to integer.
- if (LoadTy->isStructTy() || LoadTy->isArrayTy() || StoredTy->isStructTy() ||
- StoredTy->isArrayTy())
- return false;
-
- uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy);
-
- // The store size must be byte-aligned to support future type casts.
- if (llvm::alignTo(StoreSize, 8) != StoreSize)
- return false;
-
- // The store has to be at least as big as the load.
- if (StoreSize < DL.getTypeSizeInBits(LoadTy))
- return false;
-
- // Don't coerce non-integral pointers to integers or vice versa.
- if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) !=
- DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
- // As a special case, allow coercion of memset used to initialize
- // an array w/null. Despite non-integral pointers not generally having a
- // specific bit pattern, we do assume null is zero.
- if (auto *CI = dyn_cast<Constant>(StoredVal))
- return CI->isNullValue();
- return false;
- }
-
- return true;
-}
-
-template <class T, class HelperClass>
-static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy,
- HelperClass &Helper,
- const DataLayout &DL) {
- assert(canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) &&
- "precondition violation - materialization can't fail");
- if (auto *C = dyn_cast<Constant>(StoredVal))
- if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
- StoredVal = FoldedStoredVal;
-
- // If this is already the right type, just return it.
- Type *StoredValTy = StoredVal->getType();
-
- uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy);
- uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy);
-
- // If the store and reload are the same size, we can always reuse it.
- if (StoredValSize == LoadedValSize) {
- // Pointer to Pointer -> use bitcast.
- if (StoredValTy->isPtrOrPtrVectorTy() && LoadedTy->isPtrOrPtrVectorTy()) {
- StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy);
- } else {
- // Convert source pointers to integers, which can be bitcast.
- if (StoredValTy->isPtrOrPtrVectorTy()) {
- StoredValTy = DL.getIntPtrType(StoredValTy);
- StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy);
- }
-
- Type *TypeToCastTo = LoadedTy;
- if (TypeToCastTo->isPtrOrPtrVectorTy())
- TypeToCastTo = DL.getIntPtrType(TypeToCastTo);
-
- if (StoredValTy != TypeToCastTo)
- StoredVal = Helper.CreateBitCast(StoredVal, TypeToCastTo);
-
- // Cast to pointer if the load needs a pointer type.
- if (LoadedTy->isPtrOrPtrVectorTy())
- StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy);
- }
-
- if (auto *C = dyn_cast<ConstantExpr>(StoredVal))
- if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
- StoredVal = FoldedStoredVal;
-
- return StoredVal;
- }
- // If the loaded value is smaller than the available value, then we can
- // extract out a piece from it. If the available value is too small, then we
- // can't do anything.
- assert(StoredValSize >= LoadedValSize &&
- "canCoerceMustAliasedValueToLoad fail");
-
- // Convert source pointers to integers, which can be manipulated.
- if (StoredValTy->isPtrOrPtrVectorTy()) {
- StoredValTy = DL.getIntPtrType(StoredValTy);
- StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy);
- }
-
- // Convert vectors and fp to integer, which can be manipulated.
- if (!StoredValTy->isIntegerTy()) {
- StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize);
- StoredVal = Helper.CreateBitCast(StoredVal, StoredValTy);
- }
-
- // If this is a big-endian system, we need to shift the value down to the low
- // bits so that a truncate will work.
- if (DL.isBigEndian()) {
- uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy) -
- DL.getTypeStoreSizeInBits(LoadedTy);
- StoredVal = Helper.CreateLShr(
- StoredVal, ConstantInt::get(StoredVal->getType(), ShiftAmt));
- }
-
- // Truncate the integer to the right size now.
- Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize);
- StoredVal = Helper.CreateTruncOrBitCast(StoredVal, NewIntTy);
-
- if (LoadedTy != NewIntTy) {
- // If the result is a pointer, inttoptr.
- if (LoadedTy->isPtrOrPtrVectorTy())
- StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy);
- else
- // Otherwise, bitcast.
- StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy);
- }
-
- if (auto *C = dyn_cast<Constant>(StoredVal))
- if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
- StoredVal = FoldedStoredVal;
-
- return StoredVal;
-}
-
-/// If we saw a store of a value to memory, and
-/// then a load from a must-aliased pointer of a different type, try to coerce
-/// the stored value. LoadedTy is the type of the load we want to replace.
-/// IRB is IRBuilder used to insert new instructions.
-///
-/// If we can't do it, return null.
-Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
- IRBuilder<> &IRB, const DataLayout &DL) {
- return coerceAvailableValueToLoadTypeHelper(StoredVal, LoadedTy, IRB, DL);
-}
-
-/// This function is called when we have a memdep query of a load that ends up
-/// being a clobbering memory write (store, memset, memcpy, memmove). This
-/// means that the write *may* provide bits used by the load but we can't be
-/// sure because the pointers don't must-alias.
-///
-/// Check this case to see if there is anything more we can do before we give
-/// up. This returns -1 if we have to give up, or a byte number in the stored
-/// value of the piece that feeds the load.
-static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
- Value *WritePtr,
- uint64_t WriteSizeInBits,
- const DataLayout &DL) {
- // If the loaded or stored value is a first class array or struct, don't try
- // to transform them. We need to be able to bitcast to integer.
- if (LoadTy->isStructTy() || LoadTy->isArrayTy())
- return -1;
-
- int64_t StoreOffset = 0, LoadOffset = 0;
- Value *StoreBase =
- GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL);
- Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL);
- if (StoreBase != LoadBase)
- return -1;
-
- // If the load and store are to the exact same address, they should have been
- // a must alias. AA must have gotten confused.
- // FIXME: Study to see if/when this happens. One case is forwarding a memset
- // to a load from the base of the memset.
-
- // If the load and store don't overlap at all, the store doesn't provide
- // anything to the load. In this case, they really don't alias at all, AA
- // must have gotten confused.
- uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy);
-
- if ((WriteSizeInBits & 7) | (LoadSize & 7))
- return -1;
- uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes.
- LoadSize /= 8;
-
- bool isAAFailure = false;
- if (StoreOffset < LoadOffset)
- isAAFailure = StoreOffset + int64_t(StoreSize) <= LoadOffset;
- else
- isAAFailure = LoadOffset + int64_t(LoadSize) <= StoreOffset;
-
- if (isAAFailure)
- return -1;
-
- // If the Load isn't completely contained within the stored bits, we don't
- // have all the bits to feed it. We could do something crazy in the future
- // (issue a smaller load then merge the bits in) but this seems unlikely to be
- // valuable.
- if (StoreOffset > LoadOffset ||
- StoreOffset + StoreSize < LoadOffset + LoadSize)
- return -1;
-
- // Okay, we can do this transformation. Return the number of bytes into the
- // store that the load is.
- return LoadOffset - StoreOffset;
-}
-
-/// This function is called when we have a
-/// memdep query of a load that ends up being a clobbering store.
-int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
- StoreInst *DepSI, const DataLayout &DL) {
- auto *StoredVal = DepSI->getValueOperand();
-
- // Cannot handle reading from store of first-class aggregate yet.
- if (StoredVal->getType()->isStructTy() ||
- StoredVal->getType()->isArrayTy())
- return -1;
-
- // Don't coerce non-integral pointers to integers or vice versa.
- if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) !=
- DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
- // Allow casts of zero values to null as a special case
- auto *CI = dyn_cast<Constant>(StoredVal);
- if (!CI || !CI->isNullValue())
- return -1;
- }
-
- Value *StorePtr = DepSI->getPointerOperand();
- uint64_t StoreSize =
- DL.getTypeSizeInBits(DepSI->getValueOperand()->getType());
- return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize,
- DL);
-}
-
-/// This function is called when we have a
-/// memdep query of a load that ends up being clobbered by another load. See if
-/// the other load can feed into the second load.
-int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
- const DataLayout &DL) {
- // Cannot handle reading from store of first-class aggregate yet.
- if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
- return -1;
-
- // Don't coerce non-integral pointers to integers or vice versa.
- if (DL.isNonIntegralPointerType(DepLI->getType()->getScalarType()) !=
- DL.isNonIntegralPointerType(LoadTy->getScalarType()))
- return -1;
-
- Value *DepPtr = DepLI->getPointerOperand();
- uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType());
- int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL);
- if (R != -1)
- return R;
-
- // If we have a load/load clobber an DepLI can be widened to cover this load,
- // then we should widen it!
- int64_t LoadOffs = 0;
- const Value *LoadBase =
- GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL);
- unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
-
- unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
- LoadBase, LoadOffs, LoadSize, DepLI);
- if (Size == 0)
- return -1;
-
- // Check non-obvious conditions enforced by MDA which we rely on for being
- // able to materialize this potentially available value
- assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!");
- assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load");
-
- return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size * 8, DL);
-}
-
-int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
- MemIntrinsic *MI, const DataLayout &DL) {
- // If the mem operation is a non-constant size, we can't handle it.
- ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
- if (!SizeCst)
- return -1;
- uint64_t MemSizeInBits = SizeCst->getZExtValue() * 8;
-
- // If this is memset, we just need to see if the offset is valid in the size
- // of the memset..
- if (MI->getIntrinsicID() == Intrinsic::memset) {
- if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
- auto *CI = dyn_cast<ConstantInt>(cast<MemSetInst>(MI)->getValue());
- if (!CI || !CI->isZero())
- return -1;
- }
- return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
- MemSizeInBits, DL);
- }
-
- // If we have a memcpy/memmove, the only case we can handle is if this is a
- // copy from constant memory. In that case, we can read directly from the
- // constant memory.
- MemTransferInst *MTI = cast<MemTransferInst>(MI);
-
- Constant *Src = dyn_cast<Constant>(MTI->getSource());
- if (!Src)
- return -1;
-
- GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL));
- if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
- return -1;
-
- // See if the access is within the bounds of the transfer.
- int Offset = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
- MemSizeInBits, DL);
- if (Offset == -1)
- return Offset;
-
- // Don't coerce non-integral pointers to integers or vice versa, and the
- // memtransfer is implicitly a raw byte code
- if (DL.isNonIntegralPointerType(LoadTy->getScalarType()))
- // TODO: Can allow nullptrs from constant zeros
- return -1;
-
- unsigned AS = Src->getType()->getPointerAddressSpace();
- // Otherwise, see if we can constant fold a load from the constant with the
- // offset applied as appropriate.
- Src =
- ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS));
- Constant *OffsetCst =
- ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
- OffsetCst);
- Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
- if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL))
- return Offset;
- return -1;
-}
-
-template <class T, class HelperClass>
-static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy,
- HelperClass &Helper,
- const DataLayout &DL) {
- LLVMContext &Ctx = SrcVal->getType()->getContext();
-
- // If two pointers are in the same address space, they have the same size,
- // so we don't need to do any truncation, etc. This avoids introducing
- // ptrtoint instructions for pointers that may be non-integral.
- if (SrcVal->getType()->isPointerTy() && LoadTy->isPointerTy() &&
- cast<PointerType>(SrcVal->getType())->getAddressSpace() ==
- cast<PointerType>(LoadTy)->getAddressSpace()) {
- return SrcVal;
- }
-
- uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
- uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8;
- // Compute which bits of the stored value are being used by the load. Convert
- // to an integer type to start with.
- if (SrcVal->getType()->isPtrOrPtrVectorTy())
- SrcVal = Helper.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType()));
- if (!SrcVal->getType()->isIntegerTy())
- SrcVal = Helper.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8));
-
- // Shift the bits to the least significant depending on endianness.
- unsigned ShiftAmt;
- if (DL.isLittleEndian())
- ShiftAmt = Offset * 8;
- else
- ShiftAmt = (StoreSize - LoadSize - Offset) * 8;
- if (ShiftAmt)
- SrcVal = Helper.CreateLShr(SrcVal,
- ConstantInt::get(SrcVal->getType(), ShiftAmt));
-
- if (LoadSize != StoreSize)
- SrcVal = Helper.CreateTruncOrBitCast(SrcVal,
- IntegerType::get(Ctx, LoadSize * 8));
- return SrcVal;
-}
-
-/// This function is called when we have a memdep query of a load that ends up
-/// being a clobbering store. This means that the store provides bits used by
-/// the load but the pointers don't must-alias. Check this case to see if
-/// there is anything more we can do before we give up.
-Value *getStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy,
- Instruction *InsertPt, const DataLayout &DL) {
-
- IRBuilder<> Builder(InsertPt);
- SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL);
- return coerceAvailableValueToLoadTypeHelper(SrcVal, LoadTy, Builder, DL);
-}
-
-Constant *getConstantStoreValueForLoad(Constant *SrcVal, unsigned Offset,
- Type *LoadTy, const DataLayout &DL) {
- ConstantFolder F;
- SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, F, DL);
- return coerceAvailableValueToLoadTypeHelper(SrcVal, LoadTy, F, DL);
-}
-
-/// This function is called when we have a memdep query of a load that ends up
-/// being a clobbering load. This means that the load *may* provide bits used
-/// by the load but we can't be sure because the pointers don't must-alias.
-/// Check this case to see if there is anything more we can do before we give
-/// up.
-Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy,
- Instruction *InsertPt, const DataLayout &DL) {
- // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
- // widen SrcVal out to a larger load.
- unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType());
- unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
- if (Offset + LoadSize > SrcValStoreSize) {
- assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
- assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
- // If we have a load/load clobber an DepLI can be widened to cover this
- // load, then we should widen it to the next power of 2 size big enough!
- unsigned NewLoadSize = Offset + LoadSize;
- if (!isPowerOf2_32(NewLoadSize))
- NewLoadSize = NextPowerOf2(NewLoadSize);
-
- Value *PtrVal = SrcVal->getPointerOperand();
- // Insert the new load after the old load. This ensures that subsequent
- // memdep queries will find the new load. We can't easily remove the old
- // load completely because it is already in the value numbering table.
- IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
- Type *DestTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8);
- Type *DestPTy =
- PointerType::get(DestTy, PtrVal->getType()->getPointerAddressSpace());
- Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
- PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
- LoadInst *NewLoad = Builder.CreateLoad(DestTy, PtrVal);
- NewLoad->takeName(SrcVal);
- NewLoad->setAlignment(SrcVal->getAlignment());
-
- LLVM_DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
- LLVM_DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
-
- // Replace uses of the original load with the wider load. On a big endian
- // system, we need to shift down to get the relevant bits.
- Value *RV = NewLoad;
- if (DL.isBigEndian())
- RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8);
- RV = Builder.CreateTrunc(RV, SrcVal->getType());
- SrcVal->replaceAllUsesWith(RV);
-
- SrcVal = NewLoad;
- }
-
- return getStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL);
-}
-
-Constant *getConstantLoadValueForLoad(Constant *SrcVal, unsigned Offset,
- Type *LoadTy, const DataLayout &DL) {
- unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType());
- unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
- if (Offset + LoadSize > SrcValStoreSize)
- return nullptr;
- return getConstantStoreValueForLoad(SrcVal, Offset, LoadTy, DL);
-}
-
-template <class T, class HelperClass>
-T *getMemInstValueForLoadHelper(MemIntrinsic *SrcInst, unsigned Offset,
- Type *LoadTy, HelperClass &Helper,
- const DataLayout &DL) {
- LLVMContext &Ctx = LoadTy->getContext();
- uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy) / 8;
-
- // We know that this method is only called when the mem transfer fully
- // provides the bits for the load.
- if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
- // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
- // independently of what the offset is.
- T *Val = cast<T>(MSI->getValue());
- if (LoadSize != 1)
- Val =
- Helper.CreateZExtOrBitCast(Val, IntegerType::get(Ctx, LoadSize * 8));
- T *OneElt = Val;
-
- // Splat the value out to the right number of bits.
- for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize;) {
- // If we can double the number of bytes set, do it.
- if (NumBytesSet * 2 <= LoadSize) {
- T *ShVal = Helper.CreateShl(
- Val, ConstantInt::get(Val->getType(), NumBytesSet * 8));
- Val = Helper.CreateOr(Val, ShVal);
- NumBytesSet <<= 1;
- continue;
- }
-
- // Otherwise insert one byte at a time.
- T *ShVal = Helper.CreateShl(Val, ConstantInt::get(Val->getType(), 1 * 8));
- Val = Helper.CreateOr(OneElt, ShVal);
- ++NumBytesSet;
- }
-
- return coerceAvailableValueToLoadTypeHelper(Val, LoadTy, Helper, DL);
- }
-
- // Otherwise, this is a memcpy/memmove from a constant global.
- MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
- Constant *Src = cast<Constant>(MTI->getSource());
- unsigned AS = Src->getType()->getPointerAddressSpace();
-
- // Otherwise, see if we can constant fold a load from the constant with the
- // offset applied as appropriate.
- Src =
- ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS));
- Constant *OffsetCst =
- ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
- OffsetCst);
- Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
- return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL);
-}
-
-/// This function is called when we have a
-/// memdep query of a load that ends up being a clobbering mem intrinsic.
-Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
- Type *LoadTy, Instruction *InsertPt,
- const DataLayout &DL) {
- IRBuilder<> Builder(InsertPt);
- return getMemInstValueForLoadHelper<Value, IRBuilder<>>(SrcInst, Offset,
- LoadTy, Builder, DL);
-}
-
-Constant *getConstantMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
- Type *LoadTy, const DataLayout &DL) {
- // The only case analyzeLoadFromClobberingMemInst cannot be converted to a
- // constant is when it's a memset of a non-constant.
- if (auto *MSI = dyn_cast<MemSetInst>(SrcInst))
- if (!isa<Constant>(MSI->getValue()))
- return nullptr;
- ConstantFolder F;
- return getMemInstValueForLoadHelper<Constant, ConstantFolder>(SrcInst, Offset,
- LoadTy, F, DL);
-}
-} // namespace VNCoercion
-} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
deleted file mode 100644
index fbc3407c301f..000000000000
--- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ /dev/null
@@ -1,1157 +0,0 @@
-//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the MapValue function, which is shared by various parts of
-// the lib/Transforms/Utils library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Argument.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalObject.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
-#include <cassert>
-#include <limits>
-#include <memory>
-#include <utility>
-
-using namespace llvm;
-
-// Out of line method to get vtable etc for class.
-void ValueMapTypeRemapper::anchor() {}
-void ValueMaterializer::anchor() {}
-
-namespace {
-
-/// A basic block used in a BlockAddress whose function body is not yet
-/// materialized.
-struct DelayedBasicBlock {
- BasicBlock *OldBB;
- std::unique_ptr<BasicBlock> TempBB;
-
- DelayedBasicBlock(const BlockAddress &Old)
- : OldBB(Old.getBasicBlock()),
- TempBB(BasicBlock::Create(Old.getContext())) {}
-};
-
-struct WorklistEntry {
- enum EntryKind {
- MapGlobalInit,
- MapAppendingVar,
- MapGlobalAliasee,
- RemapFunction
- };
- struct GVInitTy {
- GlobalVariable *GV;
- Constant *Init;
- };
- struct AppendingGVTy {
- GlobalVariable *GV;
- Constant *InitPrefix;
- };
- struct GlobalAliaseeTy {
- GlobalAlias *GA;
- Constant *Aliasee;
- };
-
- unsigned Kind : 2;
- unsigned MCID : 29;
- unsigned AppendingGVIsOldCtorDtor : 1;
- unsigned AppendingGVNumNewMembers;
- union {
- GVInitTy GVInit;
- AppendingGVTy AppendingGV;
- GlobalAliaseeTy GlobalAliasee;
- Function *RemapF;
- } Data;
-};
-
-struct MappingContext {
- ValueToValueMapTy *VM;
- ValueMaterializer *Materializer = nullptr;
-
- /// Construct a MappingContext with a value map and materializer.
- explicit MappingContext(ValueToValueMapTy &VM,
- ValueMaterializer *Materializer = nullptr)
- : VM(&VM), Materializer(Materializer) {}
-};
-
-class Mapper {
- friend class MDNodeMapper;
-
-#ifndef NDEBUG
- DenseSet<GlobalValue *> AlreadyScheduled;
-#endif
-
- RemapFlags Flags;
- ValueMapTypeRemapper *TypeMapper;
- unsigned CurrentMCID = 0;
- SmallVector<MappingContext, 2> MCs;
- SmallVector<WorklistEntry, 4> Worklist;
- SmallVector<DelayedBasicBlock, 1> DelayedBBs;
- SmallVector<Constant *, 16> AppendingInits;
-
-public:
- Mapper(ValueToValueMapTy &VM, RemapFlags Flags,
- ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer)
- : Flags(Flags), TypeMapper(TypeMapper),
- MCs(1, MappingContext(VM, Materializer)) {}
-
- /// ValueMapper should explicitly call \a flush() before destruction.
- ~Mapper() { assert(!hasWorkToDo() && "Expected to be flushed"); }
-
- bool hasWorkToDo() const { return !Worklist.empty(); }
-
- unsigned
- registerAlternateMappingContext(ValueToValueMapTy &VM,
- ValueMaterializer *Materializer = nullptr) {
- MCs.push_back(MappingContext(VM, Materializer));
- return MCs.size() - 1;
- }
-
- void addFlags(RemapFlags Flags);
-
- void remapGlobalObjectMetadata(GlobalObject &GO);
-
- Value *mapValue(const Value *V);
- void remapInstruction(Instruction *I);
- void remapFunction(Function &F);
-
- Constant *mapConstant(const Constant *C) {
- return cast_or_null<Constant>(mapValue(C));
- }
-
- /// Map metadata.
- ///
- /// Find the mapping for MD. Guarantees that the return will be resolved
- /// (not an MDNode, or MDNode::isResolved() returns true).
- Metadata *mapMetadata(const Metadata *MD);
-
- void scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init,
- unsigned MCID);
- void scheduleMapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
- bool IsOldCtorDtor,
- ArrayRef<Constant *> NewMembers,
- unsigned MCID);
- void scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee,
- unsigned MCID);
- void scheduleRemapFunction(Function &F, unsigned MCID);
-
- void flush();
-
-private:
- void mapGlobalInitializer(GlobalVariable &GV, Constant &Init);
- void mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
- bool IsOldCtorDtor,
- ArrayRef<Constant *> NewMembers);
- void mapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee);
- void remapFunction(Function &F, ValueToValueMapTy &VM);
-
- ValueToValueMapTy &getVM() { return *MCs[CurrentMCID].VM; }
- ValueMaterializer *getMaterializer() { return MCs[CurrentMCID].Materializer; }
-
- Value *mapBlockAddress(const BlockAddress &BA);
-
- /// Map metadata that doesn't require visiting operands.
- Optional<Metadata *> mapSimpleMetadata(const Metadata *MD);
-
- Metadata *mapToMetadata(const Metadata *Key, Metadata *Val);
- Metadata *mapToSelf(const Metadata *MD);
-};
-
-class MDNodeMapper {
- Mapper &M;
-
- /// Data about a node in \a UniquedGraph.
- struct Data {
- bool HasChanged = false;
- unsigned ID = std::numeric_limits<unsigned>::max();
- TempMDNode Placeholder;
- };
-
- /// A graph of uniqued nodes.
- struct UniquedGraph {
- SmallDenseMap<const Metadata *, Data, 32> Info; // Node properties.
- SmallVector<MDNode *, 16> POT; // Post-order traversal.
-
- /// Propagate changed operands through the post-order traversal.
- ///
- /// Iteratively update \a Data::HasChanged for each node based on \a
- /// Data::HasChanged of its operands, until fixed point.
- void propagateChanges();
-
- /// Get a forward reference to a node to use as an operand.
- Metadata &getFwdReference(MDNode &Op);
- };
-
- /// Worklist of distinct nodes whose operands need to be remapped.
- SmallVector<MDNode *, 16> DistinctWorklist;
-
- // Storage for a UniquedGraph.
- SmallDenseMap<const Metadata *, Data, 32> InfoStorage;
- SmallVector<MDNode *, 16> POTStorage;
-
-public:
- MDNodeMapper(Mapper &M) : M(M) {}
-
- /// Map a metadata node (and its transitive operands).
- ///
- /// Map all the (unmapped) nodes in the subgraph under \c N. The iterative
- /// algorithm handles distinct nodes and uniqued node subgraphs using
- /// different strategies.
- ///
- /// Distinct nodes are immediately mapped and added to \a DistinctWorklist
- /// using \a mapDistinctNode(). Their mapping can always be computed
- /// immediately without visiting operands, even if their operands change.
- ///
- /// The mapping for uniqued nodes depends on whether their operands change.
- /// \a mapTopLevelUniquedNode() traverses the transitive uniqued subgraph of
- /// a node to calculate uniqued node mappings in bulk. Distinct leafs are
- /// added to \a DistinctWorklist with \a mapDistinctNode().
- ///
- /// After mapping \c N itself, this function remaps the operands of the
- /// distinct nodes in \a DistinctWorklist until the entire subgraph under \c
- /// N has been mapped.
- Metadata *map(const MDNode &N);
-
-private:
- /// Map a top-level uniqued node and the uniqued subgraph underneath it.
- ///
- /// This builds up a post-order traversal of the (unmapped) uniqued subgraph
- /// underneath \c FirstN and calculates the nodes' mapping. Each node uses
- /// the identity mapping (\a Mapper::mapToSelf()) as long as all of its
- /// operands uses the identity mapping.
- ///
- /// The algorithm works as follows:
- ///
- /// 1. \a createPOT(): traverse the uniqued subgraph under \c FirstN and
- /// save the post-order traversal in the given \a UniquedGraph, tracking
- /// nodes' operands change.
- ///
- /// 2. \a UniquedGraph::propagateChanges(): propagate changed operands
- /// through the \a UniquedGraph until fixed point, following the rule
- /// that if a node changes, any node that references must also change.
- ///
- /// 3. \a mapNodesInPOT(): map the uniqued nodes, creating new uniqued nodes
- /// (referencing new operands) where necessary.
- Metadata *mapTopLevelUniquedNode(const MDNode &FirstN);
-
- /// Try to map the operand of an \a MDNode.
- ///
- /// If \c Op is already mapped, return the mapping. If it's not an \a
- /// MDNode, compute and return the mapping. If it's a distinct \a MDNode,
- /// return the result of \a mapDistinctNode().
- ///
- /// \return None if \c Op is an unmapped uniqued \a MDNode.
- /// \post getMappedOp(Op) only returns None if this returns None.
- Optional<Metadata *> tryToMapOperand(const Metadata *Op);
-
- /// Map a distinct node.
- ///
- /// Return the mapping for the distinct node \c N, saving the result in \a
- /// DistinctWorklist for later remapping.
- ///
- /// \pre \c N is not yet mapped.
- /// \pre \c N.isDistinct().
- MDNode *mapDistinctNode(const MDNode &N);
-
- /// Get a previously mapped node.
- Optional<Metadata *> getMappedOp(const Metadata *Op) const;
-
- /// Create a post-order traversal of an unmapped uniqued node subgraph.
- ///
- /// This traverses the metadata graph deeply enough to map \c FirstN. It
- /// uses \a tryToMapOperand() (via \a Mapper::mapSimplifiedNode()), so any
- /// metadata that has already been mapped will not be part of the POT.
- ///
- /// Each node that has a changed operand from outside the graph (e.g., a
- /// distinct node, an already-mapped uniqued node, or \a ConstantAsMetadata)
- /// is marked with \a Data::HasChanged.
- ///
- /// \return \c true if any nodes in \c G have \a Data::HasChanged.
- /// \post \c G.POT is a post-order traversal ending with \c FirstN.
- /// \post \a Data::hasChanged in \c G.Info indicates whether any node needs
- /// to change because of operands outside the graph.
- bool createPOT(UniquedGraph &G, const MDNode &FirstN);
-
- /// Visit the operands of a uniqued node in the POT.
- ///
- /// Visit the operands in the range from \c I to \c E, returning the first
- /// uniqued node we find that isn't yet in \c G. \c I is always advanced to
- /// where to continue the loop through the operands.
- ///
- /// This sets \c HasChanged if any of the visited operands change.
- MDNode *visitOperands(UniquedGraph &G, MDNode::op_iterator &I,
- MDNode::op_iterator E, bool &HasChanged);
-
- /// Map all the nodes in the given uniqued graph.
- ///
- /// This visits all the nodes in \c G in post-order, using the identity
- /// mapping or creating a new node depending on \a Data::HasChanged.
- ///
- /// \pre \a getMappedOp() returns None for nodes in \c G, but not for any of
- /// their operands outside of \c G.
- /// \pre \a Data::HasChanged is true for a node in \c G iff any of its
- /// operands have changed.
- /// \post \a getMappedOp() returns the mapped node for every node in \c G.
- void mapNodesInPOT(UniquedGraph &G);
-
- /// Remap a node's operands using the given functor.
- ///
- /// Iterate through the operands of \c N and update them in place using \c
- /// mapOperand.
- ///
- /// \pre N.isDistinct() or N.isTemporary().
- template <class OperandMapper>
- void remapOperands(MDNode &N, OperandMapper mapOperand);
-};
-
-} // end anonymous namespace
-
-Value *Mapper::mapValue(const Value *V) {
- ValueToValueMapTy::iterator I = getVM().find(V);
-
- // If the value already exists in the map, use it.
- if (I != getVM().end()) {
- assert(I->second && "Unexpected null mapping");
- return I->second;
- }
-
- // If we have a materializer and it can materialize a value, use that.
- if (auto *Materializer = getMaterializer()) {
- if (Value *NewV = Materializer->materialize(const_cast<Value *>(V))) {
- getVM()[V] = NewV;
- return NewV;
- }
- }
-
- // Global values do not need to be seeded into the VM if they
- // are using the identity mapping.
- if (isa<GlobalValue>(V)) {
- if (Flags & RF_NullMapMissingGlobalValues)
- return nullptr;
- return getVM()[V] = const_cast<Value *>(V);
- }
-
- if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
- // Inline asm may need *type* remapping.
- FunctionType *NewTy = IA->getFunctionType();
- if (TypeMapper) {
- NewTy = cast<FunctionType>(TypeMapper->remapType(NewTy));
-
- if (NewTy != IA->getFunctionType())
- V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(),
- IA->hasSideEffects(), IA->isAlignStack());
- }
-
- return getVM()[V] = const_cast<Value *>(V);
- }
-
- if (const auto *MDV = dyn_cast<MetadataAsValue>(V)) {
- const Metadata *MD = MDV->getMetadata();
-
- if (auto *LAM = dyn_cast<LocalAsMetadata>(MD)) {
- // Look through to grab the local value.
- if (Value *LV = mapValue(LAM->getValue())) {
- if (V == LAM->getValue())
- return const_cast<Value *>(V);
- return MetadataAsValue::get(V->getContext(), ValueAsMetadata::get(LV));
- }
-
- // FIXME: always return nullptr once Verifier::verifyDominatesUse()
- // ensures metadata operands only reference defined SSA values.
- return (Flags & RF_IgnoreMissingLocals)
- ? nullptr
- : MetadataAsValue::get(V->getContext(),
- MDTuple::get(V->getContext(), None));
- }
-
- // If this is a module-level metadata and we know that nothing at the module
- // level is changing, then use an identity mapping.
- if (Flags & RF_NoModuleLevelChanges)
- return getVM()[V] = const_cast<Value *>(V);
-
- // Map the metadata and turn it into a value.
- auto *MappedMD = mapMetadata(MD);
- if (MD == MappedMD)
- return getVM()[V] = const_cast<Value *>(V);
- return getVM()[V] = MetadataAsValue::get(V->getContext(), MappedMD);
- }
-
- // Okay, this either must be a constant (which may or may not be mappable) or
- // is something that is not in the mapping table.
- Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
- if (!C)
- return nullptr;
-
- if (BlockAddress *BA = dyn_cast<BlockAddress>(C))
- return mapBlockAddress(*BA);
-
- auto mapValueOrNull = [this](Value *V) {
- auto Mapped = mapValue(V);
- assert((Mapped || (Flags & RF_NullMapMissingGlobalValues)) &&
- "Unexpected null mapping for constant operand without "
- "NullMapMissingGlobalValues flag");
- return Mapped;
- };
-
- // Otherwise, we have some other constant to remap. Start by checking to see
- // if all operands have an identity remapping.
- unsigned OpNo = 0, NumOperands = C->getNumOperands();
- Value *Mapped = nullptr;
- for (; OpNo != NumOperands; ++OpNo) {
- Value *Op = C->getOperand(OpNo);
- Mapped = mapValueOrNull(Op);
- if (!Mapped)
- return nullptr;
- if (Mapped != Op)
- break;
- }
-
- // See if the type mapper wants to remap the type as well.
- Type *NewTy = C->getType();
- if (TypeMapper)
- NewTy = TypeMapper->remapType(NewTy);
-
- // If the result type and all operands match up, then just insert an identity
- // mapping.
- if (OpNo == NumOperands && NewTy == C->getType())
- return getVM()[V] = C;
-
- // Okay, we need to create a new constant. We've already processed some or
- // all of the operands, set them all up now.
- SmallVector<Constant*, 8> Ops;
- Ops.reserve(NumOperands);
- for (unsigned j = 0; j != OpNo; ++j)
- Ops.push_back(cast<Constant>(C->getOperand(j)));
-
- // If one of the operands mismatch, push it and the other mapped operands.
- if (OpNo != NumOperands) {
- Ops.push_back(cast<Constant>(Mapped));
-
- // Map the rest of the operands that aren't processed yet.
- for (++OpNo; OpNo != NumOperands; ++OpNo) {
- Mapped = mapValueOrNull(C->getOperand(OpNo));
- if (!Mapped)
- return nullptr;
- Ops.push_back(cast<Constant>(Mapped));
- }
- }
- Type *NewSrcTy = nullptr;
- if (TypeMapper)
- if (auto *GEPO = dyn_cast<GEPOperator>(C))
- NewSrcTy = TypeMapper->remapType(GEPO->getSourceElementType());
-
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- return getVM()[V] = CE->getWithOperands(Ops, NewTy, false, NewSrcTy);
- if (isa<ConstantArray>(C))
- return getVM()[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops);
- if (isa<ConstantStruct>(C))
- return getVM()[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops);
- if (isa<ConstantVector>(C))
- return getVM()[V] = ConstantVector::get(Ops);
- // If this is a no-operand constant, it must be because the type was remapped.
- if (isa<UndefValue>(C))
- return getVM()[V] = UndefValue::get(NewTy);
- if (isa<ConstantAggregateZero>(C))
- return getVM()[V] = ConstantAggregateZero::get(NewTy);
- assert(isa<ConstantPointerNull>(C));
- return getVM()[V] = ConstantPointerNull::get(cast<PointerType>(NewTy));
-}
-
-Value *Mapper::mapBlockAddress(const BlockAddress &BA) {
- Function *F = cast<Function>(mapValue(BA.getFunction()));
-
- // F may not have materialized its initializer. In that case, create a
- // dummy basic block for now, and replace it once we've materialized all
- // the initializers.
- BasicBlock *BB;
- if (F->empty()) {
- DelayedBBs.push_back(DelayedBasicBlock(BA));
- BB = DelayedBBs.back().TempBB.get();
- } else {
- BB = cast_or_null<BasicBlock>(mapValue(BA.getBasicBlock()));
- }
-
- return getVM()[&BA] = BlockAddress::get(F, BB ? BB : BA.getBasicBlock());
-}
-
-Metadata *Mapper::mapToMetadata(const Metadata *Key, Metadata *Val) {
- getVM().MD()[Key].reset(Val);
- return Val;
-}
-
-Metadata *Mapper::mapToSelf(const Metadata *MD) {
- return mapToMetadata(MD, const_cast<Metadata *>(MD));
-}
-
-Optional<Metadata *> MDNodeMapper::tryToMapOperand(const Metadata *Op) {
- if (!Op)
- return nullptr;
-
- if (Optional<Metadata *> MappedOp = M.mapSimpleMetadata(Op)) {
-#ifndef NDEBUG
- if (auto *CMD = dyn_cast<ConstantAsMetadata>(Op))
- assert((!*MappedOp || M.getVM().count(CMD->getValue()) ||
- M.getVM().getMappedMD(Op)) &&
- "Expected Value to be memoized");
- else
- assert((isa<MDString>(Op) || M.getVM().getMappedMD(Op)) &&
- "Expected result to be memoized");
-#endif
- return *MappedOp;
- }
-
- const MDNode &N = *cast<MDNode>(Op);
- if (N.isDistinct())
- return mapDistinctNode(N);
- return None;
-}
-
-static Metadata *cloneOrBuildODR(const MDNode &N) {
- auto *CT = dyn_cast<DICompositeType>(&N);
- // If ODR type uniquing is enabled, we would have uniqued composite types
- // with identifiers during bitcode reading, so we can just use CT.
- if (CT && CT->getContext().isODRUniquingDebugTypes() &&
- CT->getIdentifier() != "")
- return const_cast<DICompositeType *>(CT);
- return MDNode::replaceWithDistinct(N.clone());
-}
-
-MDNode *MDNodeMapper::mapDistinctNode(const MDNode &N) {
- assert(N.isDistinct() && "Expected a distinct node");
- assert(!M.getVM().getMappedMD(&N) && "Expected an unmapped node");
- DistinctWorklist.push_back(
- cast<MDNode>((M.Flags & RF_MoveDistinctMDs)
- ? M.mapToSelf(&N)
- : M.mapToMetadata(&N, cloneOrBuildODR(N))));
- return DistinctWorklist.back();
-}
-
-static ConstantAsMetadata *wrapConstantAsMetadata(const ConstantAsMetadata &CMD,
- Value *MappedV) {
- if (CMD.getValue() == MappedV)
- return const_cast<ConstantAsMetadata *>(&CMD);
- return MappedV ? ConstantAsMetadata::getConstant(MappedV) : nullptr;
-}
-
-Optional<Metadata *> MDNodeMapper::getMappedOp(const Metadata *Op) const {
- if (!Op)
- return nullptr;
-
- if (Optional<Metadata *> MappedOp = M.getVM().getMappedMD(Op))
- return *MappedOp;
-
- if (isa<MDString>(Op))
- return const_cast<Metadata *>(Op);
-
- if (auto *CMD = dyn_cast<ConstantAsMetadata>(Op))
- return wrapConstantAsMetadata(*CMD, M.getVM().lookup(CMD->getValue()));
-
- return None;
-}
-
-Metadata &MDNodeMapper::UniquedGraph::getFwdReference(MDNode &Op) {
- auto Where = Info.find(&Op);
- assert(Where != Info.end() && "Expected a valid reference");
-
- auto &OpD = Where->second;
- if (!OpD.HasChanged)
- return Op;
-
- // Lazily construct a temporary node.
- if (!OpD.Placeholder)
- OpD.Placeholder = Op.clone();
-
- return *OpD.Placeholder;
-}
-
-template <class OperandMapper>
-void MDNodeMapper::remapOperands(MDNode &N, OperandMapper mapOperand) {
- assert(!N.isUniqued() && "Expected distinct or temporary nodes");
- for (unsigned I = 0, E = N.getNumOperands(); I != E; ++I) {
- Metadata *Old = N.getOperand(I);
- Metadata *New = mapOperand(Old);
-
- if (Old != New)
- N.replaceOperandWith(I, New);
- }
-}
-
-namespace {
-
-/// An entry in the worklist for the post-order traversal.
-struct POTWorklistEntry {
- MDNode *N; ///< Current node.
- MDNode::op_iterator Op; ///< Current operand of \c N.
-
- /// Keep a flag of whether operands have changed in the worklist to avoid
- /// hitting the map in \a UniquedGraph.
- bool HasChanged = false;
-
- POTWorklistEntry(MDNode &N) : N(&N), Op(N.op_begin()) {}
-};
-
-} // end anonymous namespace
-
-bool MDNodeMapper::createPOT(UniquedGraph &G, const MDNode &FirstN) {
- assert(G.Info.empty() && "Expected a fresh traversal");
- assert(FirstN.isUniqued() && "Expected uniqued node in POT");
-
- // Construct a post-order traversal of the uniqued subgraph under FirstN.
- bool AnyChanges = false;
- SmallVector<POTWorklistEntry, 16> Worklist;
- Worklist.push_back(POTWorklistEntry(const_cast<MDNode &>(FirstN)));
- (void)G.Info[&FirstN];
- while (!Worklist.empty()) {
- // Start or continue the traversal through the this node's operands.
- auto &WE = Worklist.back();
- if (MDNode *N = visitOperands(G, WE.Op, WE.N->op_end(), WE.HasChanged)) {
- // Push a new node to traverse first.
- Worklist.push_back(POTWorklistEntry(*N));
- continue;
- }
-
- // Push the node onto the POT.
- assert(WE.N->isUniqued() && "Expected only uniqued nodes");
- assert(WE.Op == WE.N->op_end() && "Expected to visit all operands");
- auto &D = G.Info[WE.N];
- AnyChanges |= D.HasChanged = WE.HasChanged;
- D.ID = G.POT.size();
- G.POT.push_back(WE.N);
-
- // Pop the node off the worklist.
- Worklist.pop_back();
- }
- return AnyChanges;
-}
-
-MDNode *MDNodeMapper::visitOperands(UniquedGraph &G, MDNode::op_iterator &I,
- MDNode::op_iterator E, bool &HasChanged) {
- while (I != E) {
- Metadata *Op = *I++; // Increment even on early return.
- if (Optional<Metadata *> MappedOp = tryToMapOperand(Op)) {
- // Check if the operand changes.
- HasChanged |= Op != *MappedOp;
- continue;
- }
-
- // A uniqued metadata node.
- MDNode &OpN = *cast<MDNode>(Op);
- assert(OpN.isUniqued() &&
- "Only uniqued operands cannot be mapped immediately");
- if (G.Info.insert(std::make_pair(&OpN, Data())).second)
- return &OpN; // This is a new one. Return it.
- }
- return nullptr;
-}
-
-void MDNodeMapper::UniquedGraph::propagateChanges() {
- bool AnyChanges;
- do {
- AnyChanges = false;
- for (MDNode *N : POT) {
- auto &D = Info[N];
- if (D.HasChanged)
- continue;
-
- if (llvm::none_of(N->operands(), [&](const Metadata *Op) {
- auto Where = Info.find(Op);
- return Where != Info.end() && Where->second.HasChanged;
- }))
- continue;
-
- AnyChanges = D.HasChanged = true;
- }
- } while (AnyChanges);
-}
-
-void MDNodeMapper::mapNodesInPOT(UniquedGraph &G) {
- // Construct uniqued nodes, building forward references as necessary.
- SmallVector<MDNode *, 16> CyclicNodes;
- for (auto *N : G.POT) {
- auto &D = G.Info[N];
- if (!D.HasChanged) {
- // The node hasn't changed.
- M.mapToSelf(N);
- continue;
- }
-
- // Remember whether this node had a placeholder.
- bool HadPlaceholder(D.Placeholder);
-
- // Clone the uniqued node and remap the operands.
- TempMDNode ClonedN = D.Placeholder ? std::move(D.Placeholder) : N->clone();
- remapOperands(*ClonedN, [this, &D, &G](Metadata *Old) {
- if (Optional<Metadata *> MappedOp = getMappedOp(Old))
- return *MappedOp;
- (void)D;
- assert(G.Info[Old].ID > D.ID && "Expected a forward reference");
- return &G.getFwdReference(*cast<MDNode>(Old));
- });
-
- auto *NewN = MDNode::replaceWithUniqued(std::move(ClonedN));
- M.mapToMetadata(N, NewN);
-
- // Nodes that were referenced out of order in the POT are involved in a
- // uniquing cycle.
- if (HadPlaceholder)
- CyclicNodes.push_back(NewN);
- }
-
- // Resolve cycles.
- for (auto *N : CyclicNodes)
- if (!N->isResolved())
- N->resolveCycles();
-}
-
-Metadata *MDNodeMapper::map(const MDNode &N) {
- assert(DistinctWorklist.empty() && "MDNodeMapper::map is not recursive");
- assert(!(M.Flags & RF_NoModuleLevelChanges) &&
- "MDNodeMapper::map assumes module-level changes");
-
- // Require resolved nodes whenever metadata might be remapped.
- assert(N.isResolved() && "Unexpected unresolved node");
-
- Metadata *MappedN =
- N.isUniqued() ? mapTopLevelUniquedNode(N) : mapDistinctNode(N);
- while (!DistinctWorklist.empty())
- remapOperands(*DistinctWorklist.pop_back_val(), [this](Metadata *Old) {
- if (Optional<Metadata *> MappedOp = tryToMapOperand(Old))
- return *MappedOp;
- return mapTopLevelUniquedNode(*cast<MDNode>(Old));
- });
- return MappedN;
-}
-
-Metadata *MDNodeMapper::mapTopLevelUniquedNode(const MDNode &FirstN) {
- assert(FirstN.isUniqued() && "Expected uniqued node");
-
- // Create a post-order traversal of uniqued nodes under FirstN.
- UniquedGraph G;
- if (!createPOT(G, FirstN)) {
- // Return early if no nodes have changed.
- for (const MDNode *N : G.POT)
- M.mapToSelf(N);
- return &const_cast<MDNode &>(FirstN);
- }
-
- // Update graph with all nodes that have changed.
- G.propagateChanges();
-
- // Map all the nodes in the graph.
- mapNodesInPOT(G);
-
- // Return the original node, remapped.
- return *getMappedOp(&FirstN);
-}
-
-namespace {
-
-struct MapMetadataDisabler {
- ValueToValueMapTy &VM;
-
- MapMetadataDisabler(ValueToValueMapTy &VM) : VM(VM) {
- VM.disableMapMetadata();
- }
-
- ~MapMetadataDisabler() { VM.enableMapMetadata(); }
-};
-
-} // end anonymous namespace
-
-Optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) {
- // If the value already exists in the map, use it.
- if (Optional<Metadata *> NewMD = getVM().getMappedMD(MD))
- return *NewMD;
-
- if (isa<MDString>(MD))
- return const_cast<Metadata *>(MD);
-
- // This is a module-level metadata. If nothing at the module level is
- // changing, use an identity mapping.
- if ((Flags & RF_NoModuleLevelChanges))
- return const_cast<Metadata *>(MD);
-
- if (auto *CMD = dyn_cast<ConstantAsMetadata>(MD)) {
- // Disallow recursion into metadata mapping through mapValue.
- MapMetadataDisabler MMD(getVM());
-
- // Don't memoize ConstantAsMetadata. Instead of lasting until the
- // LLVMContext is destroyed, they can be deleted when the GlobalValue they
- // reference is destructed. These aren't super common, so the extra
- // indirection isn't that expensive.
- return wrapConstantAsMetadata(*CMD, mapValue(CMD->getValue()));
- }
-
- assert(isa<MDNode>(MD) && "Expected a metadata node");
-
- return None;
-}
-
-Metadata *Mapper::mapMetadata(const Metadata *MD) {
- assert(MD && "Expected valid metadata");
- assert(!isa<LocalAsMetadata>(MD) && "Unexpected local metadata");
-
- if (Optional<Metadata *> NewMD = mapSimpleMetadata(MD))
- return *NewMD;
-
- return MDNodeMapper(*this).map(*cast<MDNode>(MD));
-}
-
-void Mapper::flush() {
- // Flush out the worklist of global values.
- while (!Worklist.empty()) {
- WorklistEntry E = Worklist.pop_back_val();
- CurrentMCID = E.MCID;
- switch (E.Kind) {
- case WorklistEntry::MapGlobalInit:
- E.Data.GVInit.GV->setInitializer(mapConstant(E.Data.GVInit.Init));
- remapGlobalObjectMetadata(*E.Data.GVInit.GV);
- break;
- case WorklistEntry::MapAppendingVar: {
- unsigned PrefixSize = AppendingInits.size() - E.AppendingGVNumNewMembers;
- mapAppendingVariable(*E.Data.AppendingGV.GV,
- E.Data.AppendingGV.InitPrefix,
- E.AppendingGVIsOldCtorDtor,
- makeArrayRef(AppendingInits).slice(PrefixSize));
- AppendingInits.resize(PrefixSize);
- break;
- }
- case WorklistEntry::MapGlobalAliasee:
- E.Data.GlobalAliasee.GA->setAliasee(
- mapConstant(E.Data.GlobalAliasee.Aliasee));
- break;
- case WorklistEntry::RemapFunction:
- remapFunction(*E.Data.RemapF);
- break;
- }
- }
- CurrentMCID = 0;
-
- // Finish logic for block addresses now that all global values have been
- // handled.
- while (!DelayedBBs.empty()) {
- DelayedBasicBlock DBB = DelayedBBs.pop_back_val();
- BasicBlock *BB = cast_or_null<BasicBlock>(mapValue(DBB.OldBB));
- DBB.TempBB->replaceAllUsesWith(BB ? BB : DBB.OldBB);
- }
-}
-
-void Mapper::remapInstruction(Instruction *I) {
- // Remap operands.
- for (Use &Op : I->operands()) {
- Value *V = mapValue(Op);
- // If we aren't ignoring missing entries, assert that something happened.
- if (V)
- Op = V;
- else
- assert((Flags & RF_IgnoreMissingLocals) &&
- "Referenced value not in value map!");
- }
-
- // Remap phi nodes' incoming blocks.
- if (PHINode *PN = dyn_cast<PHINode>(I)) {
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *V = mapValue(PN->getIncomingBlock(i));
- // If we aren't ignoring missing entries, assert that something happened.
- if (V)
- PN->setIncomingBlock(i, cast<BasicBlock>(V));
- else
- assert((Flags & RF_IgnoreMissingLocals) &&
- "Referenced block not in value map!");
- }
- }
-
- // Remap attached metadata.
- SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
- I->getAllMetadata(MDs);
- for (const auto &MI : MDs) {
- MDNode *Old = MI.second;
- MDNode *New = cast_or_null<MDNode>(mapMetadata(Old));
- if (New != Old)
- I->setMetadata(MI.first, New);
- }
-
- if (!TypeMapper)
- return;
-
- // If the instruction's type is being remapped, do so now.
- if (auto CS = CallSite(I)) {
- SmallVector<Type *, 3> Tys;
- FunctionType *FTy = CS.getFunctionType();
- Tys.reserve(FTy->getNumParams());
- for (Type *Ty : FTy->params())
- Tys.push_back(TypeMapper->remapType(Ty));
- CS.mutateFunctionType(FunctionType::get(
- TypeMapper->remapType(I->getType()), Tys, FTy->isVarArg()));
-
- LLVMContext &C = CS->getContext();
- AttributeList Attrs = CS.getAttributes();
- for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
- if (Attrs.hasAttribute(i, Attribute::ByVal)) {
- Type *Ty = Attrs.getAttribute(i, Attribute::ByVal).getValueAsType();
- if (!Ty)
- continue;
-
- Attrs = Attrs.removeAttribute(C, i, Attribute::ByVal);
- Attrs = Attrs.addAttribute(
- C, i, Attribute::getWithByValType(C, TypeMapper->remapType(Ty)));
- }
- }
- CS.setAttributes(Attrs);
- return;
- }
- if (auto *AI = dyn_cast<AllocaInst>(I))
- AI->setAllocatedType(TypeMapper->remapType(AI->getAllocatedType()));
- if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
- GEP->setSourceElementType(
- TypeMapper->remapType(GEP->getSourceElementType()));
- GEP->setResultElementType(
- TypeMapper->remapType(GEP->getResultElementType()));
- }
- I->mutateType(TypeMapper->remapType(I->getType()));
-}
-
-void Mapper::remapGlobalObjectMetadata(GlobalObject &GO) {
- SmallVector<std::pair<unsigned, MDNode *>, 8> MDs;
- GO.getAllMetadata(MDs);
- GO.clearMetadata();
- for (const auto &I : MDs)
- GO.addMetadata(I.first, *cast<MDNode>(mapMetadata(I.second)));
-}
-
-void Mapper::remapFunction(Function &F) {
- // Remap the operands.
- for (Use &Op : F.operands())
- if (Op)
- Op = mapValue(Op);
-
- // Remap the metadata attachments.
- remapGlobalObjectMetadata(F);
-
- // Remap the argument types.
- if (TypeMapper)
- for (Argument &A : F.args())
- A.mutateType(TypeMapper->remapType(A.getType()));
-
- // Remap the instructions.
- for (BasicBlock &BB : F)
- for (Instruction &I : BB)
- remapInstruction(&I);
-}
-
-void Mapper::mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
- bool IsOldCtorDtor,
- ArrayRef<Constant *> NewMembers) {
- SmallVector<Constant *, 16> Elements;
- if (InitPrefix) {
- unsigned NumElements =
- cast<ArrayType>(InitPrefix->getType())->getNumElements();
- for (unsigned I = 0; I != NumElements; ++I)
- Elements.push_back(InitPrefix->getAggregateElement(I));
- }
-
- PointerType *VoidPtrTy;
- Type *EltTy;
- if (IsOldCtorDtor) {
- // FIXME: This upgrade is done during linking to support the C API. See
- // also IRLinker::linkAppendingVarProto() in IRMover.cpp.
- VoidPtrTy = Type::getInt8Ty(GV.getContext())->getPointerTo();
- auto &ST = *cast<StructType>(NewMembers.front()->getType());
- Type *Tys[3] = {ST.getElementType(0), ST.getElementType(1), VoidPtrTy};
- EltTy = StructType::get(GV.getContext(), Tys, false);
- }
-
- for (auto *V : NewMembers) {
- Constant *NewV;
- if (IsOldCtorDtor) {
- auto *S = cast<ConstantStruct>(V);
- auto *E1 = cast<Constant>(mapValue(S->getOperand(0)));
- auto *E2 = cast<Constant>(mapValue(S->getOperand(1)));
- Constant *Null = Constant::getNullValue(VoidPtrTy);
- NewV = ConstantStruct::get(cast<StructType>(EltTy), E1, E2, Null);
- } else {
- NewV = cast_or_null<Constant>(mapValue(V));
- }
- Elements.push_back(NewV);
- }
-
- GV.setInitializer(ConstantArray::get(
- cast<ArrayType>(GV.getType()->getElementType()), Elements));
-}
-
-void Mapper::scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init,
- unsigned MCID) {
- assert(AlreadyScheduled.insert(&GV).second && "Should not reschedule");
- assert(MCID < MCs.size() && "Invalid mapping context");
-
- WorklistEntry WE;
- WE.Kind = WorklistEntry::MapGlobalInit;
- WE.MCID = MCID;
- WE.Data.GVInit.GV = &GV;
- WE.Data.GVInit.Init = &Init;
- Worklist.push_back(WE);
-}
-
-void Mapper::scheduleMapAppendingVariable(GlobalVariable &GV,
- Constant *InitPrefix,
- bool IsOldCtorDtor,
- ArrayRef<Constant *> NewMembers,
- unsigned MCID) {
- assert(AlreadyScheduled.insert(&GV).second && "Should not reschedule");
- assert(MCID < MCs.size() && "Invalid mapping context");
-
- WorklistEntry WE;
- WE.Kind = WorklistEntry::MapAppendingVar;
- WE.MCID = MCID;
- WE.Data.AppendingGV.GV = &GV;
- WE.Data.AppendingGV.InitPrefix = InitPrefix;
- WE.AppendingGVIsOldCtorDtor = IsOldCtorDtor;
- WE.AppendingGVNumNewMembers = NewMembers.size();
- Worklist.push_back(WE);
- AppendingInits.append(NewMembers.begin(), NewMembers.end());
-}
-
-void Mapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee,
- unsigned MCID) {
- assert(AlreadyScheduled.insert(&GA).second && "Should not reschedule");
- assert(MCID < MCs.size() && "Invalid mapping context");
-
- WorklistEntry WE;
- WE.Kind = WorklistEntry::MapGlobalAliasee;
- WE.MCID = MCID;
- WE.Data.GlobalAliasee.GA = &GA;
- WE.Data.GlobalAliasee.Aliasee = &Aliasee;
- Worklist.push_back(WE);
-}
-
-void Mapper::scheduleRemapFunction(Function &F, unsigned MCID) {
- assert(AlreadyScheduled.insert(&F).second && "Should not reschedule");
- assert(MCID < MCs.size() && "Invalid mapping context");
-
- WorklistEntry WE;
- WE.Kind = WorklistEntry::RemapFunction;
- WE.MCID = MCID;
- WE.Data.RemapF = &F;
- Worklist.push_back(WE);
-}
-
-void Mapper::addFlags(RemapFlags Flags) {
- assert(!hasWorkToDo() && "Expected to have flushed the worklist");
- this->Flags = this->Flags | Flags;
-}
-
-static Mapper *getAsMapper(void *pImpl) {
- return reinterpret_cast<Mapper *>(pImpl);
-}
-
-namespace {
-
-class FlushingMapper {
- Mapper &M;
-
-public:
- explicit FlushingMapper(void *pImpl) : M(*getAsMapper(pImpl)) {
- assert(!M.hasWorkToDo() && "Expected to be flushed");
- }
-
- ~FlushingMapper() { M.flush(); }
-
- Mapper *operator->() const { return &M; }
-};
-
-} // end anonymous namespace
-
-ValueMapper::ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags,
- ValueMapTypeRemapper *TypeMapper,
- ValueMaterializer *Materializer)
- : pImpl(new Mapper(VM, Flags, TypeMapper, Materializer)) {}
-
-ValueMapper::~ValueMapper() { delete getAsMapper(pImpl); }
-
-unsigned
-ValueMapper::registerAlternateMappingContext(ValueToValueMapTy &VM,
- ValueMaterializer *Materializer) {
- return getAsMapper(pImpl)->registerAlternateMappingContext(VM, Materializer);
-}
-
-void ValueMapper::addFlags(RemapFlags Flags) {
- FlushingMapper(pImpl)->addFlags(Flags);
-}
-
-Value *ValueMapper::mapValue(const Value &V) {
- return FlushingMapper(pImpl)->mapValue(&V);
-}
-
-Constant *ValueMapper::mapConstant(const Constant &C) {
- return cast_or_null<Constant>(mapValue(C));
-}
-
-Metadata *ValueMapper::mapMetadata(const Metadata &MD) {
- return FlushingMapper(pImpl)->mapMetadata(&MD);
-}
-
-MDNode *ValueMapper::mapMDNode(const MDNode &N) {
- return cast_or_null<MDNode>(mapMetadata(N));
-}
-
-void ValueMapper::remapInstruction(Instruction &I) {
- FlushingMapper(pImpl)->remapInstruction(&I);
-}
-
-void ValueMapper::remapFunction(Function &F) {
- FlushingMapper(pImpl)->remapFunction(F);
-}
-
-void ValueMapper::scheduleMapGlobalInitializer(GlobalVariable &GV,
- Constant &Init,
- unsigned MCID) {
- getAsMapper(pImpl)->scheduleMapGlobalInitializer(GV, Init, MCID);
-}
-
-void ValueMapper::scheduleMapAppendingVariable(GlobalVariable &GV,
- Constant *InitPrefix,
- bool IsOldCtorDtor,
- ArrayRef<Constant *> NewMembers,
- unsigned MCID) {
- getAsMapper(pImpl)->scheduleMapAppendingVariable(
- GV, InitPrefix, IsOldCtorDtor, NewMembers, MCID);
-}
-
-void ValueMapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee,
- unsigned MCID) {
- getAsMapper(pImpl)->scheduleMapGlobalAliasee(GA, Aliasee, MCID);
-}
-
-void ValueMapper::scheduleRemapFunction(Function &F, unsigned MCID) {
- getAsMapper(pImpl)->scheduleRemapFunction(F, MCID);
-}