diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils')
60 files changed, 0 insertions, 41540 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp deleted file mode 100644 index 01912297324a..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp +++ /dev/null @@ -1,152 +0,0 @@ -//===-- ASanStackFrameLayout.cpp - helper for AddressSanitizer ------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Definition of ComputeASanStackFrameLayout (see ASanStackFrameLayout.h). -// -//===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/ASanStackFrameLayout.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/ScopedPrinter.h" -#include "llvm/Support/raw_ostream.h" -#include <algorithm> - -namespace llvm { - -// We sort the stack variables by alignment (largest first) to minimize -// unnecessary large gaps due to alignment. -// It is tempting to also sort variables by size so that larger variables -// have larger redzones at both ends. But reordering will make report analysis -// harder, especially when temporary unnamed variables are present. -// So, until we can provide more information (type, line number, etc) -// for the stack variables we avoid reordering them too much. -static inline bool CompareVars(const ASanStackVariableDescription &a, - const ASanStackVariableDescription &b) { - return a.Alignment > b.Alignment; -} - -// We also force minimal alignment for all vars to kMinAlignment so that vars -// with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars. -static const size_t kMinAlignment = 16; - -// We want to add a full redzone after every variable. -// The larger the variable Size the larger is the redzone. -// The resulting frame size is a multiple of Alignment. -static size_t VarAndRedzoneSize(size_t Size, size_t Granularity, - size_t Alignment) { - size_t Res = 0; - if (Size <= 4) Res = 16; - else if (Size <= 16) Res = 32; - else if (Size <= 128) Res = Size + 32; - else if (Size <= 512) Res = Size + 64; - else if (Size <= 4096) Res = Size + 128; - else Res = Size + 256; - return alignTo(std::max(Res, 2 * Granularity), Alignment); -} - -ASanStackFrameLayout -ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars, - size_t Granularity, size_t MinHeaderSize) { - assert(Granularity >= 8 && Granularity <= 64 && - (Granularity & (Granularity - 1)) == 0); - assert(MinHeaderSize >= 16 && (MinHeaderSize & (MinHeaderSize - 1)) == 0 && - MinHeaderSize >= Granularity); - const size_t NumVars = Vars.size(); - assert(NumVars > 0); - for (size_t i = 0; i < NumVars; i++) - Vars[i].Alignment = std::max(Vars[i].Alignment, kMinAlignment); - - llvm::stable_sort(Vars, CompareVars); - - ASanStackFrameLayout Layout; - Layout.Granularity = Granularity; - Layout.FrameAlignment = std::max(Granularity, Vars[0].Alignment); - size_t Offset = std::max(std::max(MinHeaderSize, Granularity), - Vars[0].Alignment); - assert((Offset % Granularity) == 0); - for (size_t i = 0; i < NumVars; i++) { - bool IsLast = i == NumVars - 1; - size_t Alignment = std::max(Granularity, Vars[i].Alignment); - (void)Alignment; // Used only in asserts. - size_t Size = Vars[i].Size; - assert((Alignment & (Alignment - 1)) == 0); - assert(Layout.FrameAlignment >= Alignment); - assert((Offset % Alignment) == 0); - assert(Size > 0); - size_t NextAlignment = IsLast ? Granularity - : std::max(Granularity, Vars[i + 1].Alignment); - size_t SizeWithRedzone = VarAndRedzoneSize(Size, Granularity, - NextAlignment); - Vars[i].Offset = Offset; - Offset += SizeWithRedzone; - } - if (Offset % MinHeaderSize) { - Offset += MinHeaderSize - (Offset % MinHeaderSize); - } - Layout.FrameSize = Offset; - assert((Layout.FrameSize % MinHeaderSize) == 0); - return Layout; -} - -SmallString<64> ComputeASanStackFrameDescription( - const SmallVectorImpl<ASanStackVariableDescription> &Vars) { - SmallString<2048> StackDescriptionStorage; - raw_svector_ostream StackDescription(StackDescriptionStorage); - StackDescription << Vars.size(); - - for (const auto &Var : Vars) { - std::string Name = Var.Name; - if (Var.Line) { - Name += ":"; - Name += to_string(Var.Line); - } - StackDescription << " " << Var.Offset << " " << Var.Size << " " - << Name.size() << " " << Name; - } - return StackDescription.str(); -} - -SmallVector<uint8_t, 64> -GetShadowBytes(const SmallVectorImpl<ASanStackVariableDescription> &Vars, - const ASanStackFrameLayout &Layout) { - assert(Vars.size() > 0); - SmallVector<uint8_t, 64> SB; - SB.clear(); - const size_t Granularity = Layout.Granularity; - SB.resize(Vars[0].Offset / Granularity, kAsanStackLeftRedzoneMagic); - for (const auto &Var : Vars) { - SB.resize(Var.Offset / Granularity, kAsanStackMidRedzoneMagic); - - SB.resize(SB.size() + Var.Size / Granularity, 0); - if (Var.Size % Granularity) - SB.push_back(Var.Size % Granularity); - } - SB.resize(Layout.FrameSize / Granularity, kAsanStackRightRedzoneMagic); - return SB; -} - -SmallVector<uint8_t, 64> GetShadowBytesAfterScope( - const SmallVectorImpl<ASanStackVariableDescription> &Vars, - const ASanStackFrameLayout &Layout) { - SmallVector<uint8_t, 64> SB = GetShadowBytes(Vars, Layout); - const size_t Granularity = Layout.Granularity; - - for (const auto &Var : Vars) { - assert(Var.LifetimeSize <= Var.Size); - const size_t LifetimeShadowSize = - (Var.LifetimeSize + Granularity - 1) / Granularity; - const size_t Offset = Var.Offset / Granularity; - std::fill(SB.begin() + Offset, SB.begin() + Offset + LifetimeShadowSize, - kAsanStackUseAfterScopeMagic); - } - - return SB; -} - -} // llvm namespace diff --git a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp deleted file mode 100644 index ee0973002c47..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp +++ /dev/null @@ -1,276 +0,0 @@ -//===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file adds DWARF discriminators to the IR. Path discriminators are -// used to decide what CFG path was taken inside sub-graphs whose instructions -// share the same line and column number information. -// -// The main user of this is the sample profiler. Instruction samples are -// mapped to line number information. Since a single line may be spread -// out over several basic blocks, discriminators add more precise location -// for the samples. -// -// For example, -// -// 1 #define ASSERT(P) -// 2 if (!(P)) -// 3 abort() -// ... -// 100 while (true) { -// 101 ASSERT (sum < 0); -// 102 ... -// 130 } -// -// when converted to IR, this snippet looks something like: -// -// while.body: ; preds = %entry, %if.end -// %0 = load i32* %sum, align 4, !dbg !15 -// %cmp = icmp slt i32 %0, 0, !dbg !15 -// br i1 %cmp, label %if.end, label %if.then, !dbg !15 -// -// if.then: ; preds = %while.body -// call void @abort(), !dbg !15 -// br label %if.end, !dbg !15 -// -// Notice that all the instructions in blocks 'while.body' and 'if.then' -// have exactly the same debug information. When this program is sampled -// at runtime, the profiler will assume that all these instructions are -// equally frequent. This, in turn, will consider the edge while.body->if.then -// to be frequently taken (which is incorrect). -// -// By adding a discriminator value to the instructions in block 'if.then', -// we can distinguish instructions at line 101 with discriminator 0 from -// the instructions at line 101 with discriminator 1. -// -// For more details about DWARF discriminators, please visit -// http://wiki.dwarfstd.org/index.php?title=Path_Discriminators -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/AddDiscriminators.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils.h" -#include <utility> - -using namespace llvm; - -#define DEBUG_TYPE "add-discriminators" - -// Command line option to disable discriminator generation even in the -// presence of debug information. This is only needed when debugging -// debug info generation issues. -static cl::opt<bool> NoDiscriminators( - "no-discriminators", cl::init(false), - cl::desc("Disable generation of discriminator information.")); - -namespace { - -// The legacy pass of AddDiscriminators. -struct AddDiscriminatorsLegacyPass : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - - AddDiscriminatorsLegacyPass() : FunctionPass(ID) { - initializeAddDiscriminatorsLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; -}; - -} // end anonymous namespace - -char AddDiscriminatorsLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN(AddDiscriminatorsLegacyPass, "add-discriminators", - "Add DWARF path discriminators", false, false) -INITIALIZE_PASS_END(AddDiscriminatorsLegacyPass, "add-discriminators", - "Add DWARF path discriminators", false, false) - -// Create the legacy AddDiscriminatorsPass. -FunctionPass *llvm::createAddDiscriminatorsPass() { - return new AddDiscriminatorsLegacyPass(); -} - -static bool shouldHaveDiscriminator(const Instruction *I) { - return !isa<IntrinsicInst>(I) || isa<MemIntrinsic>(I); -} - -/// Assign DWARF discriminators. -/// -/// To assign discriminators, we examine the boundaries of every -/// basic block and its successors. Suppose there is a basic block B1 -/// with successor B2. The last instruction I1 in B1 and the first -/// instruction I2 in B2 are located at the same file and line number. -/// This situation is illustrated in the following code snippet: -/// -/// if (i < 10) x = i; -/// -/// entry: -/// br i1 %cmp, label %if.then, label %if.end, !dbg !10 -/// if.then: -/// %1 = load i32* %i.addr, align 4, !dbg !10 -/// store i32 %1, i32* %x, align 4, !dbg !10 -/// br label %if.end, !dbg !10 -/// if.end: -/// ret void, !dbg !12 -/// -/// Notice how the branch instruction in block 'entry' and all the -/// instructions in block 'if.then' have the exact same debug location -/// information (!dbg !10). -/// -/// To distinguish instructions in block 'entry' from instructions in -/// block 'if.then', we generate a new lexical block for all the -/// instruction in block 'if.then' that share the same file and line -/// location with the last instruction of block 'entry'. -/// -/// This new lexical block will have the same location information as -/// the previous one, but with a new DWARF discriminator value. -/// -/// One of the main uses of this discriminator value is in runtime -/// sample profilers. It allows the profiler to distinguish instructions -/// at location !dbg !10 that execute on different basic blocks. This is -/// important because while the predicate 'if (x < 10)' may have been -/// executed millions of times, the assignment 'x = i' may have only -/// executed a handful of times (meaning that the entry->if.then edge is -/// seldom taken). -/// -/// If we did not have discriminator information, the profiler would -/// assign the same weight to both blocks 'entry' and 'if.then', which -/// in turn will make it conclude that the entry->if.then edge is very -/// hot. -/// -/// To decide where to create new discriminator values, this function -/// traverses the CFG and examines instruction at basic block boundaries. -/// If the last instruction I1 of a block B1 is at the same file and line -/// location as instruction I2 of successor B2, then it creates a new -/// lexical block for I2 and all the instruction in B2 that share the same -/// file and line location as I2. This new lexical block will have a -/// different discriminator number than I1. -static bool addDiscriminators(Function &F) { - // If the function has debug information, but the user has disabled - // discriminators, do nothing. - // Simlarly, if the function has no debug info, do nothing. - if (NoDiscriminators || !F.getSubprogram()) - return false; - - bool Changed = false; - - using Location = std::pair<StringRef, unsigned>; - using BBSet = DenseSet<const BasicBlock *>; - using LocationBBMap = DenseMap<Location, BBSet>; - using LocationDiscriminatorMap = DenseMap<Location, unsigned>; - using LocationSet = DenseSet<Location>; - - LocationBBMap LBM; - LocationDiscriminatorMap LDM; - - // Traverse all instructions in the function. If the source line location - // of the instruction appears in other basic block, assign a new - // discriminator for this instruction. - for (BasicBlock &B : F) { - for (auto &I : B.getInstList()) { - // Not all intrinsic calls should have a discriminator. - // We want to avoid a non-deterministic assignment of discriminators at - // different debug levels. We still allow discriminators on memory - // intrinsic calls because those can be early expanded by SROA into - // pairs of loads and stores, and the expanded load/store instructions - // should have a valid discriminator. - if (!shouldHaveDiscriminator(&I)) - continue; - const DILocation *DIL = I.getDebugLoc(); - if (!DIL) - continue; - Location L = std::make_pair(DIL->getFilename(), DIL->getLine()); - auto &BBMap = LBM[L]; - auto R = BBMap.insert(&B); - if (BBMap.size() == 1) - continue; - // If we could insert more than one block with the same line+file, a - // discriminator is needed to distinguish both instructions. - // Only the lowest 7 bits are used to represent a discriminator to fit - // it in 1 byte ULEB128 representation. - unsigned Discriminator = R.second ? ++LDM[L] : LDM[L]; - auto NewDIL = DIL->cloneWithBaseDiscriminator(Discriminator); - if (!NewDIL) { - LLVM_DEBUG(dbgs() << "Could not encode discriminator: " - << DIL->getFilename() << ":" << DIL->getLine() << ":" - << DIL->getColumn() << ":" << Discriminator << " " - << I << "\n"); - } else { - I.setDebugLoc(NewDIL.getValue()); - LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" - << DIL->getColumn() << ":" << Discriminator << " " << I - << "\n"); - } - Changed = true; - } - } - - // Traverse all instructions and assign new discriminators to call - // instructions with the same lineno that are in the same basic block. - // Sample base profile needs to distinguish different function calls within - // a same source line for correct profile annotation. - for (BasicBlock &B : F) { - LocationSet CallLocations; - for (auto &I : B.getInstList()) { - // We bypass intrinsic calls for the following two reasons: - // 1) We want to avoid a non-deterministic assigment of - // discriminators. - // 2) We want to minimize the number of base discriminators used. - if (!isa<InvokeInst>(I) && (!isa<CallInst>(I) || isa<IntrinsicInst>(I))) - continue; - - DILocation *CurrentDIL = I.getDebugLoc(); - if (!CurrentDIL) - continue; - Location L = - std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine()); - if (!CallLocations.insert(L).second) { - unsigned Discriminator = ++LDM[L]; - auto NewDIL = CurrentDIL->cloneWithBaseDiscriminator(Discriminator); - if (!NewDIL) { - LLVM_DEBUG(dbgs() - << "Could not encode discriminator: " - << CurrentDIL->getFilename() << ":" - << CurrentDIL->getLine() << ":" << CurrentDIL->getColumn() - << ":" << Discriminator << " " << I << "\n"); - } else { - I.setDebugLoc(NewDIL.getValue()); - Changed = true; - } - } - } - } - return Changed; -} - -bool AddDiscriminatorsLegacyPass::runOnFunction(Function &F) { - return addDiscriminators(F); -} - -PreservedAnalyses AddDiscriminatorsPass::run(Function &F, - FunctionAnalysisManager &AM) { - if (!addDiscriminators(F)) - return PreservedAnalyses::all(); - - // FIXME: should be all() - return PreservedAnalyses::none(); -} diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp deleted file mode 100644 index 5fa371377c85..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ /dev/null @@ -1,934 +0,0 @@ -//===- BasicBlockUtils.cpp - BasicBlock Utilities --------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This family of functions perform manipulations on basic blocks, and -// instructions contained within basic blocks. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/DomTreeUpdater.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/Analysis/MemorySSAUpdater.h" -#include "llvm/Analysis/PostDominators.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/User.h" -#include "llvm/IR/Value.h" -#include "llvm/IR/ValueHandle.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/Local.h" -#include <cassert> -#include <cstdint> -#include <string> -#include <utility> -#include <vector> - -using namespace llvm; - -#define DEBUG_TYPE "basicblock-utils" - -void llvm::DetatchDeadBlocks( - ArrayRef<BasicBlock *> BBs, - SmallVectorImpl<DominatorTree::UpdateType> *Updates, - bool KeepOneInputPHIs) { - for (auto *BB : BBs) { - // Loop through all of our successors and make sure they know that one - // of their predecessors is going away. - SmallPtrSet<BasicBlock *, 4> UniqueSuccessors; - for (BasicBlock *Succ : successors(BB)) { - Succ->removePredecessor(BB, KeepOneInputPHIs); - if (Updates && UniqueSuccessors.insert(Succ).second) - Updates->push_back({DominatorTree::Delete, BB, Succ}); - } - - // Zap all the instructions in the block. - while (!BB->empty()) { - Instruction &I = BB->back(); - // If this instruction is used, replace uses with an arbitrary value. - // Because control flow can't get here, we don't care what we replace the - // value with. Note that since this block is unreachable, and all values - // contained within it must dominate their uses, that all uses will - // eventually be removed (they are themselves dead). - if (!I.use_empty()) - I.replaceAllUsesWith(UndefValue::get(I.getType())); - BB->getInstList().pop_back(); - } - new UnreachableInst(BB->getContext(), BB); - assert(BB->getInstList().size() == 1 && - isa<UnreachableInst>(BB->getTerminator()) && - "The successor list of BB isn't empty before " - "applying corresponding DTU updates."); - } -} - -void llvm::DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU, - bool KeepOneInputPHIs) { - DeleteDeadBlocks({BB}, DTU, KeepOneInputPHIs); -} - -void llvm::DeleteDeadBlocks(ArrayRef <BasicBlock *> BBs, DomTreeUpdater *DTU, - bool KeepOneInputPHIs) { -#ifndef NDEBUG - // Make sure that all predecessors of each dead block is also dead. - SmallPtrSet<BasicBlock *, 4> Dead(BBs.begin(), BBs.end()); - assert(Dead.size() == BBs.size() && "Duplicating blocks?"); - for (auto *BB : Dead) - for (BasicBlock *Pred : predecessors(BB)) - assert(Dead.count(Pred) && "All predecessors must be dead!"); -#endif - - SmallVector<DominatorTree::UpdateType, 4> Updates; - DetatchDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs); - - if (DTU) - DTU->applyUpdatesPermissive(Updates); - - for (BasicBlock *BB : BBs) - if (DTU) - DTU->deleteBB(BB); - else - BB->eraseFromParent(); -} - -bool llvm::EliminateUnreachableBlocks(Function &F, DomTreeUpdater *DTU, - bool KeepOneInputPHIs) { - df_iterator_default_set<BasicBlock*> Reachable; - - // Mark all reachable blocks. - for (BasicBlock *BB : depth_first_ext(&F, Reachable)) - (void)BB/* Mark all reachable blocks */; - - // Collect all dead blocks. - std::vector<BasicBlock*> DeadBlocks; - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) - if (!Reachable.count(&*I)) { - BasicBlock *BB = &*I; - DeadBlocks.push_back(BB); - } - - // Delete the dead blocks. - DeleteDeadBlocks(DeadBlocks, DTU, KeepOneInputPHIs); - - return !DeadBlocks.empty(); -} - -void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, - MemoryDependenceResults *MemDep) { - if (!isa<PHINode>(BB->begin())) return; - - while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { - if (PN->getIncomingValue(0) != PN) - PN->replaceAllUsesWith(PN->getIncomingValue(0)); - else - PN->replaceAllUsesWith(UndefValue::get(PN->getType())); - - if (MemDep) - MemDep->removeInstruction(PN); // Memdep updates AA itself. - - PN->eraseFromParent(); - } -} - -bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { - // Recursively deleting a PHI may cause multiple PHIs to be deleted - // or RAUW'd undef, so use an array of WeakTrackingVH for the PHIs to delete. - SmallVector<WeakTrackingVH, 8> PHIs; - for (PHINode &PN : BB->phis()) - PHIs.push_back(&PN); - - bool Changed = false; - for (unsigned i = 0, e = PHIs.size(); i != e; ++i) - if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*())) - Changed |= RecursivelyDeleteDeadPHINode(PN, TLI); - - return Changed; -} - -bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, - LoopInfo *LI, MemorySSAUpdater *MSSAU, - MemoryDependenceResults *MemDep) { - if (BB->hasAddressTaken()) - return false; - - // Can't merge if there are multiple predecessors, or no predecessors. - BasicBlock *PredBB = BB->getUniquePredecessor(); - if (!PredBB) return false; - - // Don't break self-loops. - if (PredBB == BB) return false; - // Don't break unwinding instructions. - if (PredBB->getTerminator()->isExceptionalTerminator()) - return false; - - // Can't merge if there are multiple distinct successors. - if (PredBB->getUniqueSuccessor() != BB) - return false; - - // Can't merge if there is PHI loop. - for (PHINode &PN : BB->phis()) - for (Value *IncValue : PN.incoming_values()) - if (IncValue == &PN) - return false; - - LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into " - << PredBB->getName() << "\n"); - - // Begin by getting rid of unneeded PHIs. - SmallVector<AssertingVH<Value>, 4> IncomingValues; - if (isa<PHINode>(BB->front())) { - for (PHINode &PN : BB->phis()) - if (!isa<PHINode>(PN.getIncomingValue(0)) || - cast<PHINode>(PN.getIncomingValue(0))->getParent() != BB) - IncomingValues.push_back(PN.getIncomingValue(0)); - FoldSingleEntryPHINodes(BB, MemDep); - } - - // DTU update: Collect all the edges that exit BB. - // These dominator edges will be redirected from Pred. - std::vector<DominatorTree::UpdateType> Updates; - if (DTU) { - Updates.reserve(1 + (2 * succ_size(BB))); - // Add insert edges first. Experimentally, for the particular case of two - // blocks that can be merged, with a single successor and single predecessor - // respectively, it is beneficial to have all insert updates first. Deleting - // edges first may lead to unreachable blocks, followed by inserting edges - // making the blocks reachable again. Such DT updates lead to high compile - // times. We add inserts before deletes here to reduce compile time. - for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) - // This successor of BB may already have PredBB as a predecessor. - if (llvm::find(successors(PredBB), *I) == succ_end(PredBB)) - Updates.push_back({DominatorTree::Insert, PredBB, *I}); - for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) - Updates.push_back({DominatorTree::Delete, BB, *I}); - Updates.push_back({DominatorTree::Delete, PredBB, BB}); - } - - if (MSSAU) - MSSAU->moveAllAfterMergeBlocks(BB, PredBB, &*(BB->begin())); - - // Delete the unconditional branch from the predecessor... - PredBB->getInstList().pop_back(); - - // Make all PHI nodes that referred to BB now refer to Pred as their - // source... - BB->replaceAllUsesWith(PredBB); - - // Move all definitions in the successor to the predecessor... - PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); - new UnreachableInst(BB->getContext(), BB); - - // Eliminate duplicate dbg.values describing the entry PHI node post-splice. - for (auto Incoming : IncomingValues) { - if (isa<Instruction>(*Incoming)) { - SmallVector<DbgValueInst *, 2> DbgValues; - SmallDenseSet<std::pair<DILocalVariable *, DIExpression *>, 2> - DbgValueSet; - llvm::findDbgValues(DbgValues, Incoming); - for (auto &DVI : DbgValues) { - auto R = DbgValueSet.insert({DVI->getVariable(), DVI->getExpression()}); - if (!R.second) - DVI->eraseFromParent(); - } - } - } - - // Inherit predecessors name if it exists. - if (!PredBB->hasName()) - PredBB->takeName(BB); - - if (LI) - LI->removeBlock(BB); - - if (MemDep) - MemDep->invalidateCachedPredecessors(); - - // Finally, erase the old block and update dominator info. - if (DTU) { - assert(BB->getInstList().size() == 1 && - isa<UnreachableInst>(BB->getTerminator()) && - "The successor list of BB isn't empty before " - "applying corresponding DTU updates."); - DTU->applyUpdatesPermissive(Updates); - DTU->deleteBB(BB); - } - - else { - BB->eraseFromParent(); // Nuke BB if DTU is nullptr. - } - return true; -} - -void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL, - BasicBlock::iterator &BI, Value *V) { - Instruction &I = *BI; - // Replaces all of the uses of the instruction with uses of the value - I.replaceAllUsesWith(V); - - // Make sure to propagate a name if there is one already. - if (I.hasName() && !V->hasName()) - V->takeName(&I); - - // Delete the unnecessary instruction now... - BI = BIL.erase(BI); -} - -void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL, - BasicBlock::iterator &BI, Instruction *I) { - assert(I->getParent() == nullptr && - "ReplaceInstWithInst: Instruction already inserted into basic block!"); - - // Copy debug location to newly added instruction, if it wasn't already set - // by the caller. - if (!I->getDebugLoc()) - I->setDebugLoc(BI->getDebugLoc()); - - // Insert the new instruction into the basic block... - BasicBlock::iterator New = BIL.insert(BI, I); - - // Replace all uses of the old instruction, and delete it. - ReplaceInstWithValue(BIL, BI, I); - - // Move BI back to point to the newly inserted instruction - BI = New; -} - -void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) { - BasicBlock::iterator BI(From); - ReplaceInstWithInst(From->getParent()->getInstList(), BI, To); -} - -BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT, - LoopInfo *LI, MemorySSAUpdater *MSSAU) { - unsigned SuccNum = GetSuccessorNumber(BB, Succ); - - // If this is a critical edge, let SplitCriticalEdge do it. - Instruction *LatchTerm = BB->getTerminator(); - if (SplitCriticalEdge( - LatchTerm, SuccNum, - CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA())) - return LatchTerm->getSuccessor(SuccNum); - - // If the edge isn't critical, then BB has a single successor or Succ has a - // single pred. Split the block. - if (BasicBlock *SP = Succ->getSinglePredecessor()) { - // If the successor only has a single pred, split the top of the successor - // block. - assert(SP == BB && "CFG broken"); - SP = nullptr; - return SplitBlock(Succ, &Succ->front(), DT, LI, MSSAU); - } - - // Otherwise, if BB has a single successor, split it at the bottom of the - // block. - assert(BB->getTerminator()->getNumSuccessors() == 1 && - "Should have a single succ!"); - return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU); -} - -unsigned -llvm::SplitAllCriticalEdges(Function &F, - const CriticalEdgeSplittingOptions &Options) { - unsigned NumBroken = 0; - for (BasicBlock &BB : F) { - Instruction *TI = BB.getTerminator(); - if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI)) - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - if (SplitCriticalEdge(TI, i, Options)) - ++NumBroken; - } - return NumBroken; -} - -BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, - DominatorTree *DT, LoopInfo *LI, - MemorySSAUpdater *MSSAU) { - BasicBlock::iterator SplitIt = SplitPt->getIterator(); - while (isa<PHINode>(SplitIt) || SplitIt->isEHPad()) - ++SplitIt; - BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split"); - - // The new block lives in whichever loop the old one did. This preserves - // LCSSA as well, because we force the split point to be after any PHI nodes. - if (LI) - if (Loop *L = LI->getLoopFor(Old)) - L->addBasicBlockToLoop(New, *LI); - - if (DT) - // Old dominates New. New node dominates all other nodes dominated by Old. - if (DomTreeNode *OldNode = DT->getNode(Old)) { - std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end()); - - DomTreeNode *NewNode = DT->addNewBlock(New, Old); - for (DomTreeNode *I : Children) - DT->changeImmediateDominator(I, NewNode); - } - - // Move MemoryAccesses still tracked in Old, but part of New now. - // Update accesses in successor blocks accordingly. - if (MSSAU) - MSSAU->moveAllAfterSpliceBlocks(Old, New, &*(New->begin())); - - return New; -} - -/// Update DominatorTree, LoopInfo, and LCCSA analysis information. -static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, - ArrayRef<BasicBlock *> Preds, - DominatorTree *DT, LoopInfo *LI, - MemorySSAUpdater *MSSAU, - bool PreserveLCSSA, bool &HasLoopExit) { - // Update dominator tree if available. - if (DT) { - if (OldBB == DT->getRootNode()->getBlock()) { - assert(NewBB == &NewBB->getParent()->getEntryBlock()); - DT->setNewRoot(NewBB); - } else { - // Split block expects NewBB to have a non-empty set of predecessors. - DT->splitBlock(NewBB); - } - } - - // Update MemoryPhis after split if MemorySSA is available - if (MSSAU) - MSSAU->wireOldPredecessorsToNewImmediatePredecessor(OldBB, NewBB, Preds); - - // The rest of the logic is only relevant for updating the loop structures. - if (!LI) - return; - - assert(DT && "DT should be available to update LoopInfo!"); - Loop *L = LI->getLoopFor(OldBB); - - // If we need to preserve loop analyses, collect some information about how - // this split will affect loops. - bool IsLoopEntry = !!L; - bool SplitMakesNewLoopHeader = false; - for (BasicBlock *Pred : Preds) { - // Preds that are not reachable from entry should not be used to identify if - // OldBB is a loop entry or if SplitMakesNewLoopHeader. Unreachable blocks - // are not within any loops, so we incorrectly mark SplitMakesNewLoopHeader - // as true and make the NewBB the header of some loop. This breaks LI. - if (!DT->isReachableFromEntry(Pred)) - continue; - // If we need to preserve LCSSA, determine if any of the preds is a loop - // exit. - if (PreserveLCSSA) - if (Loop *PL = LI->getLoopFor(Pred)) - if (!PL->contains(OldBB)) - HasLoopExit = true; - - // If we need to preserve LoopInfo, note whether any of the preds crosses - // an interesting loop boundary. - if (!L) - continue; - if (L->contains(Pred)) - IsLoopEntry = false; - else - SplitMakesNewLoopHeader = true; - } - - // Unless we have a loop for OldBB, nothing else to do here. - if (!L) - return; - - if (IsLoopEntry) { - // Add the new block to the nearest enclosing loop (and not an adjacent - // loop). To find this, examine each of the predecessors and determine which - // loops enclose them, and select the most-nested loop which contains the - // loop containing the block being split. - Loop *InnermostPredLoop = nullptr; - for (BasicBlock *Pred : Preds) { - if (Loop *PredLoop = LI->getLoopFor(Pred)) { - // Seek a loop which actually contains the block being split (to avoid - // adjacent loops). - while (PredLoop && !PredLoop->contains(OldBB)) - PredLoop = PredLoop->getParentLoop(); - - // Select the most-nested of these loops which contains the block. - if (PredLoop && PredLoop->contains(OldBB) && - (!InnermostPredLoop || - InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth())) - InnermostPredLoop = PredLoop; - } - } - - if (InnermostPredLoop) - InnermostPredLoop->addBasicBlockToLoop(NewBB, *LI); - } else { - L->addBasicBlockToLoop(NewBB, *LI); - if (SplitMakesNewLoopHeader) - L->moveToHeader(NewBB); - } -} - -/// Update the PHI nodes in OrigBB to include the values coming from NewBB. -/// This also updates AliasAnalysis, if available. -static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, - ArrayRef<BasicBlock *> Preds, BranchInst *BI, - bool HasLoopExit) { - // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. - SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end()); - for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) { - PHINode *PN = cast<PHINode>(I++); - - // Check to see if all of the values coming in are the same. If so, we - // don't need to create a new PHI node, unless it's needed for LCSSA. - Value *InVal = nullptr; - if (!HasLoopExit) { - InVal = PN->getIncomingValueForBlock(Preds[0]); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - if (!PredSet.count(PN->getIncomingBlock(i))) - continue; - if (!InVal) - InVal = PN->getIncomingValue(i); - else if (InVal != PN->getIncomingValue(i)) { - InVal = nullptr; - break; - } - } - } - - if (InVal) { - // If all incoming values for the new PHI would be the same, just don't - // make a new PHI. Instead, just remove the incoming values from the old - // PHI. - - // NOTE! This loop walks backwards for a reason! First off, this minimizes - // the cost of removal if we end up removing a large number of values, and - // second off, this ensures that the indices for the incoming values - // aren't invalidated when we remove one. - for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) - if (PredSet.count(PN->getIncomingBlock(i))) - PN->removeIncomingValue(i, false); - - // Add an incoming value to the PHI node in the loop for the preheader - // edge. - PN->addIncoming(InVal, NewBB); - continue; - } - - // If the values coming into the block are not the same, we need a new - // PHI. - // Create the new PHI node, insert it into NewBB at the end of the block - PHINode *NewPHI = - PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); - - // NOTE! This loop walks backwards for a reason! First off, this minimizes - // the cost of removal if we end up removing a large number of values, and - // second off, this ensures that the indices for the incoming values aren't - // invalidated when we remove one. - for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) { - BasicBlock *IncomingBB = PN->getIncomingBlock(i); - if (PredSet.count(IncomingBB)) { - Value *V = PN->removeIncomingValue(i, false); - NewPHI->addIncoming(V, IncomingBB); - } - } - - PN->addIncoming(NewPHI, NewBB); - } -} - -BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, - ArrayRef<BasicBlock *> Preds, - const char *Suffix, DominatorTree *DT, - LoopInfo *LI, MemorySSAUpdater *MSSAU, - bool PreserveLCSSA) { - // Do not attempt to split that which cannot be split. - if (!BB->canSplitPredecessors()) - return nullptr; - - // For the landingpads we need to act a bit differently. - // Delegate this work to the SplitLandingPadPredecessors. - if (BB->isLandingPad()) { - SmallVector<BasicBlock*, 2> NewBBs; - std::string NewName = std::string(Suffix) + ".split-lp"; - - SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs, DT, - LI, MSSAU, PreserveLCSSA); - return NewBBs[0]; - } - - // Create new basic block, insert right before the original block. - BasicBlock *NewBB = BasicBlock::Create( - BB->getContext(), BB->getName() + Suffix, BB->getParent(), BB); - - // The new block unconditionally branches to the old block. - BranchInst *BI = BranchInst::Create(BB, NewBB); - // Splitting the predecessors of a loop header creates a preheader block. - if (LI && LI->isLoopHeader(BB)) - // Using the loop start line number prevents debuggers stepping into the - // loop body for this instruction. - BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc()); - else - BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc()); - - // Move the edges from Preds to point to NewBB instead of BB. - for (unsigned i = 0, e = Preds.size(); i != e; ++i) { - // This is slightly more strict than necessary; the minimum requirement - // is that there be no more than one indirectbr branching to BB. And - // all BlockAddress uses would need to be updated. - assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && - "Cannot split an edge from an IndirectBrInst"); - assert(!isa<CallBrInst>(Preds[i]->getTerminator()) && - "Cannot split an edge from a CallBrInst"); - Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); - } - - // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI - // node becomes an incoming value for BB's phi node. However, if the Preds - // list is empty, we need to insert dummy entries into the PHI nodes in BB to - // account for the newly created predecessor. - if (Preds.empty()) { - // Insert dummy values as the incoming value. - for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) - cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); - } - - // Update DominatorTree, LoopInfo, and LCCSA analysis information. - bool HasLoopExit = false; - UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, MSSAU, PreserveLCSSA, - HasLoopExit); - - if (!Preds.empty()) { - // Update the PHI nodes in BB with the values coming from NewBB. - UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit); - } - - return NewBB; -} - -void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, - ArrayRef<BasicBlock *> Preds, - const char *Suffix1, const char *Suffix2, - SmallVectorImpl<BasicBlock *> &NewBBs, - DominatorTree *DT, LoopInfo *LI, - MemorySSAUpdater *MSSAU, - bool PreserveLCSSA) { - assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!"); - - // Create a new basic block for OrigBB's predecessors listed in Preds. Insert - // it right before the original block. - BasicBlock *NewBB1 = BasicBlock::Create(OrigBB->getContext(), - OrigBB->getName() + Suffix1, - OrigBB->getParent(), OrigBB); - NewBBs.push_back(NewBB1); - - // The new block unconditionally branches to the old block. - BranchInst *BI1 = BranchInst::Create(OrigBB, NewBB1); - BI1->setDebugLoc(OrigBB->getFirstNonPHI()->getDebugLoc()); - - // Move the edges from Preds to point to NewBB1 instead of OrigBB. - for (unsigned i = 0, e = Preds.size(); i != e; ++i) { - // This is slightly more strict than necessary; the minimum requirement - // is that there be no more than one indirectbr branching to BB. And - // all BlockAddress uses would need to be updated. - assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && - "Cannot split an edge from an IndirectBrInst"); - Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1); - } - - bool HasLoopExit = false; - UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DT, LI, MSSAU, PreserveLCSSA, - HasLoopExit); - - // Update the PHI nodes in OrigBB with the values coming from NewBB1. - UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, HasLoopExit); - - // Move the remaining edges from OrigBB to point to NewBB2. - SmallVector<BasicBlock*, 8> NewBB2Preds; - for (pred_iterator i = pred_begin(OrigBB), e = pred_end(OrigBB); - i != e; ) { - BasicBlock *Pred = *i++; - if (Pred == NewBB1) continue; - assert(!isa<IndirectBrInst>(Pred->getTerminator()) && - "Cannot split an edge from an IndirectBrInst"); - NewBB2Preds.push_back(Pred); - e = pred_end(OrigBB); - } - - BasicBlock *NewBB2 = nullptr; - if (!NewBB2Preds.empty()) { - // Create another basic block for the rest of OrigBB's predecessors. - NewBB2 = BasicBlock::Create(OrigBB->getContext(), - OrigBB->getName() + Suffix2, - OrigBB->getParent(), OrigBB); - NewBBs.push_back(NewBB2); - - // The new block unconditionally branches to the old block. - BranchInst *BI2 = BranchInst::Create(OrigBB, NewBB2); - BI2->setDebugLoc(OrigBB->getFirstNonPHI()->getDebugLoc()); - - // Move the remaining edges from OrigBB to point to NewBB2. - for (BasicBlock *NewBB2Pred : NewBB2Preds) - NewBB2Pred->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2); - - // Update DominatorTree, LoopInfo, and LCCSA analysis information. - HasLoopExit = false; - UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DT, LI, MSSAU, - PreserveLCSSA, HasLoopExit); - - // Update the PHI nodes in OrigBB with the values coming from NewBB2. - UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, HasLoopExit); - } - - LandingPadInst *LPad = OrigBB->getLandingPadInst(); - Instruction *Clone1 = LPad->clone(); - Clone1->setName(Twine("lpad") + Suffix1); - NewBB1->getInstList().insert(NewBB1->getFirstInsertionPt(), Clone1); - - if (NewBB2) { - Instruction *Clone2 = LPad->clone(); - Clone2->setName(Twine("lpad") + Suffix2); - NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2); - - // Create a PHI node for the two cloned landingpad instructions only - // if the original landingpad instruction has some uses. - if (!LPad->use_empty()) { - assert(!LPad->getType()->isTokenTy() && - "Split cannot be applied if LPad is token type. Otherwise an " - "invalid PHINode of token type would be created."); - PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad); - PN->addIncoming(Clone1, NewBB1); - PN->addIncoming(Clone2, NewBB2); - LPad->replaceAllUsesWith(PN); - } - LPad->eraseFromParent(); - } else { - // There is no second clone. Just replace the landing pad with the first - // clone. - LPad->replaceAllUsesWith(Clone1); - LPad->eraseFromParent(); - } -} - -ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, - BasicBlock *Pred, - DomTreeUpdater *DTU) { - Instruction *UncondBranch = Pred->getTerminator(); - // Clone the return and add it to the end of the predecessor. - Instruction *NewRet = RI->clone(); - Pred->getInstList().push_back(NewRet); - - // If the return instruction returns a value, and if the value was a - // PHI node in "BB", propagate the right value into the return. - for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); - i != e; ++i) { - Value *V = *i; - Instruction *NewBC = nullptr; - if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) { - // Return value might be bitcasted. Clone and insert it before the - // return instruction. - V = BCI->getOperand(0); - NewBC = BCI->clone(); - Pred->getInstList().insert(NewRet->getIterator(), NewBC); - *i = NewBC; - } - if (PHINode *PN = dyn_cast<PHINode>(V)) { - if (PN->getParent() == BB) { - if (NewBC) - NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred)); - else - *i = PN->getIncomingValueForBlock(Pred); - } - } - } - - // Update any PHI nodes in the returning block to realize that we no - // longer branch to them. - BB->removePredecessor(Pred); - UncondBranch->eraseFromParent(); - - if (DTU) - DTU->applyUpdates({{DominatorTree::Delete, Pred, BB}}); - - return cast<ReturnInst>(NewRet); -} - -Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond, - Instruction *SplitBefore, - bool Unreachable, - MDNode *BranchWeights, - DominatorTree *DT, LoopInfo *LI, - BasicBlock *ThenBlock) { - BasicBlock *Head = SplitBefore->getParent(); - BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); - Instruction *HeadOldTerm = Head->getTerminator(); - LLVMContext &C = Head->getContext(); - Instruction *CheckTerm; - bool CreateThenBlock = (ThenBlock == nullptr); - if (CreateThenBlock) { - ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); - if (Unreachable) - CheckTerm = new UnreachableInst(C, ThenBlock); - else - CheckTerm = BranchInst::Create(Tail, ThenBlock); - CheckTerm->setDebugLoc(SplitBefore->getDebugLoc()); - } else - CheckTerm = ThenBlock->getTerminator(); - BranchInst *HeadNewTerm = - BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cond); - HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights); - ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); - - if (DT) { - if (DomTreeNode *OldNode = DT->getNode(Head)) { - std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end()); - - DomTreeNode *NewNode = DT->addNewBlock(Tail, Head); - for (DomTreeNode *Child : Children) - DT->changeImmediateDominator(Child, NewNode); - - // Head dominates ThenBlock. - if (CreateThenBlock) - DT->addNewBlock(ThenBlock, Head); - else - DT->changeImmediateDominator(ThenBlock, Head); - } - } - - if (LI) { - if (Loop *L = LI->getLoopFor(Head)) { - L->addBasicBlockToLoop(ThenBlock, *LI); - L->addBasicBlockToLoop(Tail, *LI); - } - } - - return CheckTerm; -} - -void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, - Instruction **ThenTerm, - Instruction **ElseTerm, - MDNode *BranchWeights) { - BasicBlock *Head = SplitBefore->getParent(); - BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); - Instruction *HeadOldTerm = Head->getTerminator(); - LLVMContext &C = Head->getContext(); - BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); - BasicBlock *ElseBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); - *ThenTerm = BranchInst::Create(Tail, ThenBlock); - (*ThenTerm)->setDebugLoc(SplitBefore->getDebugLoc()); - *ElseTerm = BranchInst::Create(Tail, ElseBlock); - (*ElseTerm)->setDebugLoc(SplitBefore->getDebugLoc()); - BranchInst *HeadNewTerm = - BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/ElseBlock, Cond); - HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights); - ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); -} - -Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, - BasicBlock *&IfFalse) { - PHINode *SomePHI = dyn_cast<PHINode>(BB->begin()); - BasicBlock *Pred1 = nullptr; - BasicBlock *Pred2 = nullptr; - - if (SomePHI) { - if (SomePHI->getNumIncomingValues() != 2) - return nullptr; - Pred1 = SomePHI->getIncomingBlock(0); - Pred2 = SomePHI->getIncomingBlock(1); - } else { - pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - if (PI == PE) // No predecessor - return nullptr; - Pred1 = *PI++; - if (PI == PE) // Only one predecessor - return nullptr; - Pred2 = *PI++; - if (PI != PE) // More than two predecessors - return nullptr; - } - - // We can only handle branches. Other control flow will be lowered to - // branches if possible anyway. - BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator()); - BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator()); - if (!Pred1Br || !Pred2Br) - return nullptr; - - // Eliminate code duplication by ensuring that Pred1Br is conditional if - // either are. - if (Pred2Br->isConditional()) { - // If both branches are conditional, we don't have an "if statement". In - // reality, we could transform this case, but since the condition will be - // required anyway, we stand no chance of eliminating it, so the xform is - // probably not profitable. - if (Pred1Br->isConditional()) - return nullptr; - - std::swap(Pred1, Pred2); - std::swap(Pred1Br, Pred2Br); - } - - if (Pred1Br->isConditional()) { - // The only thing we have to watch out for here is to make sure that Pred2 - // doesn't have incoming edges from other blocks. If it does, the condition - // doesn't dominate BB. - if (!Pred2->getSinglePredecessor()) - return nullptr; - - // If we found a conditional branch predecessor, make sure that it branches - // to BB and Pred2Br. If it doesn't, this isn't an "if statement". - if (Pred1Br->getSuccessor(0) == BB && - Pred1Br->getSuccessor(1) == Pred2) { - IfTrue = Pred1; - IfFalse = Pred2; - } else if (Pred1Br->getSuccessor(0) == Pred2 && - Pred1Br->getSuccessor(1) == BB) { - IfTrue = Pred2; - IfFalse = Pred1; - } else { - // We know that one arm of the conditional goes to BB, so the other must - // go somewhere unrelated, and this must not be an "if statement". - return nullptr; - } - - return Pred1Br->getCondition(); - } - - // Ok, if we got here, both predecessors end with an unconditional branch to - // BB. Don't panic! If both blocks only have a single (identical) - // predecessor, and THAT is a conditional branch, then we're all ok! - BasicBlock *CommonPred = Pred1->getSinglePredecessor(); - if (CommonPred == nullptr || CommonPred != Pred2->getSinglePredecessor()) - return nullptr; - - // Otherwise, if this is a conditional branch, then we can use it! - BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator()); - if (!BI) return nullptr; - - assert(BI->isConditional() && "Two successors but not conditional?"); - if (BI->getSuccessor(0) == Pred1) { - IfTrue = Pred1; - IfFalse = Pred2; - } else { - IfTrue = Pred2; - IfFalse = Pred1; - } - return BI->getCondition(); -} diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp deleted file mode 100644 index f5e4b53f6d97..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ /dev/null @@ -1,473 +0,0 @@ -//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// BreakCriticalEdges pass - Break all of the critical edges in the CFG by -// inserting a dummy basic block. This pass may be "required" by passes that -// cannot deal with critical edges. For this usage, the structure type is -// forward declared. This pass obviously invalidates the CFG, but can update -// dominator trees. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/BreakCriticalEdges.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/MemorySSAUpdater.h" -#include "llvm/Analysis/PostDominators.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Type.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -using namespace llvm; - -#define DEBUG_TYPE "break-crit-edges" - -STATISTIC(NumBroken, "Number of blocks inserted"); - -namespace { - struct BreakCriticalEdges : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - BreakCriticalEdges() : FunctionPass(ID) { - initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override { - auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; - - auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>(); - auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr; - - auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); - auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; - unsigned N = - SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI, nullptr, PDT)); - NumBroken += N; - return N > 0; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addPreserved<LoopInfoWrapperPass>(); - - // No loop canonicalization guarantees are broken by this pass. - AU.addPreservedID(LoopSimplifyID); - } - }; -} - -char BreakCriticalEdges::ID = 0; -INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges", - "Break critical edges in CFG", false, false) - -// Publicly exposed interface to pass... -char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID; -FunctionPass *llvm::createBreakCriticalEdgesPass() { - return new BreakCriticalEdges(); -} - -PreservedAnalyses BreakCriticalEdgesPass::run(Function &F, - FunctionAnalysisManager &AM) { - auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F); - auto *LI = AM.getCachedResult<LoopAnalysis>(F); - unsigned N = SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI)); - NumBroken += N; - if (N == 0) - return PreservedAnalyses::all(); - PreservedAnalyses PA; - PA.preserve<DominatorTreeAnalysis>(); - PA.preserve<LoopAnalysis>(); - return PA; -} - -//===----------------------------------------------------------------------===// -// Implementation of the external critical edge manipulation functions -//===----------------------------------------------------------------------===// - -/// When a loop exit edge is split, LCSSA form may require new PHIs in the new -/// exit block. This function inserts the new PHIs, as needed. Preds is a list -/// of preds inside the loop, SplitBB is the new loop exit block, and DestBB is -/// the old loop exit, now the successor of SplitBB. -static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds, - BasicBlock *SplitBB, - BasicBlock *DestBB) { - // SplitBB shouldn't have anything non-trivial in it yet. - assert((SplitBB->getFirstNonPHI() == SplitBB->getTerminator() || - SplitBB->isLandingPad()) && "SplitBB has non-PHI nodes!"); - - // For each PHI in the destination block. - for (PHINode &PN : DestBB->phis()) { - unsigned Idx = PN.getBasicBlockIndex(SplitBB); - Value *V = PN.getIncomingValue(Idx); - - // If the input is a PHI which already satisfies LCSSA, don't create - // a new one. - if (const PHINode *VP = dyn_cast<PHINode>(V)) - if (VP->getParent() == SplitBB) - continue; - - // Otherwise a new PHI is needed. Create one and populate it. - PHINode *NewPN = PHINode::Create( - PN.getType(), Preds.size(), "split", - SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator()); - for (unsigned i = 0, e = Preds.size(); i != e; ++i) - NewPN->addIncoming(V, Preds[i]); - - // Update the original PHI. - PN.setIncomingValue(Idx, NewPN); - } -} - -BasicBlock * -llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum, - const CriticalEdgeSplittingOptions &Options) { - if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges)) - return nullptr; - - assert(!isa<IndirectBrInst>(TI) && - "Cannot split critical edge from IndirectBrInst"); - - BasicBlock *TIBB = TI->getParent(); - BasicBlock *DestBB = TI->getSuccessor(SuccNum); - - // Splitting the critical edge to a pad block is non-trivial. Don't do - // it in this generic function. - if (DestBB->isEHPad()) return nullptr; - - // Don't split the non-fallthrough edge from a callbr. - if (isa<CallBrInst>(TI) && SuccNum > 0) - return nullptr; - - if (Options.IgnoreUnreachableDests && - isa<UnreachableInst>(DestBB->getFirstNonPHIOrDbgOrLifetime())) - return nullptr; - - // Create a new basic block, linking it into the CFG. - BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), - TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); - // Create our unconditional branch. - BranchInst *NewBI = BranchInst::Create(DestBB, NewBB); - NewBI->setDebugLoc(TI->getDebugLoc()); - - // Branch to the new block, breaking the edge. - TI->setSuccessor(SuccNum, NewBB); - - // Insert the block into the function... right after the block TI lives in. - Function &F = *TIBB->getParent(); - Function::iterator FBBI = TIBB->getIterator(); - F.getBasicBlockList().insert(++FBBI, NewBB); - - // If there are any PHI nodes in DestBB, we need to update them so that they - // merge incoming values from NewBB instead of from TIBB. - { - unsigned BBIdx = 0; - for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { - // We no longer enter through TIBB, now we come in through NewBB. - // Revector exactly one entry in the PHI node that used to come from - // TIBB to come from NewBB. - PHINode *PN = cast<PHINode>(I); - - // Reuse the previous value of BBIdx if it lines up. In cases where we - // have multiple phi nodes with *lots* of predecessors, this is a speed - // win because we don't have to scan the PHI looking for TIBB. This - // happens because the BB list of PHI nodes are usually in the same - // order. - if (PN->getIncomingBlock(BBIdx) != TIBB) - BBIdx = PN->getBasicBlockIndex(TIBB); - PN->setIncomingBlock(BBIdx, NewBB); - } - } - - // If there are any other edges from TIBB to DestBB, update those to go - // through the split block, making those edges non-critical as well (and - // reducing the number of phi entries in the DestBB if relevant). - if (Options.MergeIdenticalEdges) { - for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { - if (TI->getSuccessor(i) != DestBB) continue; - - // Remove an entry for TIBB from DestBB phi nodes. - DestBB->removePredecessor(TIBB, Options.KeepOneInputPHIs); - - // We found another edge to DestBB, go to NewBB instead. - TI->setSuccessor(i, NewBB); - } - } - - // If we have nothing to update, just return. - auto *DT = Options.DT; - auto *PDT = Options.PDT; - auto *LI = Options.LI; - auto *MSSAU = Options.MSSAU; - if (MSSAU) - MSSAU->wireOldPredecessorsToNewImmediatePredecessor( - DestBB, NewBB, {TIBB}, Options.MergeIdenticalEdges); - - if (!DT && !PDT && !LI) - return NewBB; - - if (DT || PDT) { - // Update the DominatorTree. - // ---> NewBB -----\ - // / V - // TIBB -------\\------> DestBB - // - // First, inform the DT about the new path from TIBB to DestBB via NewBB, - // then delete the old edge from TIBB to DestBB. By doing this in that order - // DestBB stays reachable in the DT the whole time and its subtree doesn't - // get disconnected. - SmallVector<DominatorTree::UpdateType, 3> Updates; - Updates.push_back({DominatorTree::Insert, TIBB, NewBB}); - Updates.push_back({DominatorTree::Insert, NewBB, DestBB}); - if (llvm::find(successors(TIBB), DestBB) == succ_end(TIBB)) - Updates.push_back({DominatorTree::Delete, TIBB, DestBB}); - - if (DT) - DT->applyUpdates(Updates); - if (PDT) - PDT->applyUpdates(Updates); - } - - // Update LoopInfo if it is around. - if (LI) { - if (Loop *TIL = LI->getLoopFor(TIBB)) { - // If one or the other blocks were not in a loop, the new block is not - // either, and thus LI doesn't need to be updated. - if (Loop *DestLoop = LI->getLoopFor(DestBB)) { - if (TIL == DestLoop) { - // Both in the same loop, the NewBB joins loop. - DestLoop->addBasicBlockToLoop(NewBB, *LI); - } else if (TIL->contains(DestLoop)) { - // Edge from an outer loop to an inner loop. Add to the outer loop. - TIL->addBasicBlockToLoop(NewBB, *LI); - } else if (DestLoop->contains(TIL)) { - // Edge from an inner loop to an outer loop. Add to the outer loop. - DestLoop->addBasicBlockToLoop(NewBB, *LI); - } else { - // Edge from two loops with no containment relation. Because these - // are natural loops, we know that the destination block must be the - // header of its loop (adding a branch into a loop elsewhere would - // create an irreducible loop). - assert(DestLoop->getHeader() == DestBB && - "Should not create irreducible loops!"); - if (Loop *P = DestLoop->getParentLoop()) - P->addBasicBlockToLoop(NewBB, *LI); - } - } - - // If TIBB is in a loop and DestBB is outside of that loop, we may need - // to update LoopSimplify form and LCSSA form. - if (!TIL->contains(DestBB)) { - assert(!TIL->contains(NewBB) && - "Split point for loop exit is contained in loop!"); - - // Update LCSSA form in the newly created exit block. - if (Options.PreserveLCSSA) { - createPHIsForSplitLoopExit(TIBB, NewBB, DestBB); - } - - // The only that we can break LoopSimplify form by splitting a critical - // edge is if after the split there exists some edge from TIL to DestBB - // *and* the only edge into DestBB from outside of TIL is that of - // NewBB. If the first isn't true, then LoopSimplify still holds, NewBB - // is the new exit block and it has no non-loop predecessors. If the - // second isn't true, then DestBB was not in LoopSimplify form prior to - // the split as it had a non-loop predecessor. In both of these cases, - // the predecessor must be directly in TIL, not in a subloop, or again - // LoopSimplify doesn't hold. - SmallVector<BasicBlock *, 4> LoopPreds; - for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; - ++I) { - BasicBlock *P = *I; - if (P == NewBB) - continue; // The new block is known. - if (LI->getLoopFor(P) != TIL) { - // No need to re-simplify, it wasn't to start with. - LoopPreds.clear(); - break; - } - LoopPreds.push_back(P); - } - if (!LoopPreds.empty()) { - assert(!DestBB->isEHPad() && "We don't split edges to EH pads!"); - BasicBlock *NewExitBB = SplitBlockPredecessors( - DestBB, LoopPreds, "split", DT, LI, MSSAU, Options.PreserveLCSSA); - if (Options.PreserveLCSSA) - createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB); - } - } - } - } - - return NewBB; -} - -// Return the unique indirectbr predecessor of a block. This may return null -// even if such a predecessor exists, if it's not useful for splitting. -// If a predecessor is found, OtherPreds will contain all other (non-indirectbr) -// predecessors of BB. -static BasicBlock * -findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) { - // If the block doesn't have any PHIs, we don't care about it, since there's - // no point in splitting it. - PHINode *PN = dyn_cast<PHINode>(BB->begin()); - if (!PN) - return nullptr; - - // Verify we have exactly one IBR predecessor. - // Conservatively bail out if one of the other predecessors is not a "regular" - // terminator (that is, not a switch or a br). - BasicBlock *IBB = nullptr; - for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) { - BasicBlock *PredBB = PN->getIncomingBlock(Pred); - Instruction *PredTerm = PredBB->getTerminator(); - switch (PredTerm->getOpcode()) { - case Instruction::IndirectBr: - if (IBB) - return nullptr; - IBB = PredBB; - break; - case Instruction::Br: - case Instruction::Switch: - OtherPreds.push_back(PredBB); - continue; - default: - return nullptr; - } - } - - return IBB; -} - -bool llvm::SplitIndirectBrCriticalEdges(Function &F, - BranchProbabilityInfo *BPI, - BlockFrequencyInfo *BFI) { - // Check whether the function has any indirectbrs, and collect which blocks - // they may jump to. Since most functions don't have indirect branches, - // this lowers the common case's overhead to O(Blocks) instead of O(Edges). - SmallSetVector<BasicBlock *, 16> Targets; - for (auto &BB : F) { - auto *IBI = dyn_cast<IndirectBrInst>(BB.getTerminator()); - if (!IBI) - continue; - - for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ) - Targets.insert(IBI->getSuccessor(Succ)); - } - - if (Targets.empty()) - return false; - - bool ShouldUpdateAnalysis = BPI && BFI; - bool Changed = false; - for (BasicBlock *Target : Targets) { - SmallVector<BasicBlock *, 16> OtherPreds; - BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds); - // If we did not found an indirectbr, or the indirectbr is the only - // incoming edge, this isn't the kind of edge we're looking for. - if (!IBRPred || OtherPreds.empty()) - continue; - - // Don't even think about ehpads/landingpads. - Instruction *FirstNonPHI = Target->getFirstNonPHI(); - if (FirstNonPHI->isEHPad() || Target->isLandingPad()) - continue; - - BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split"); - if (ShouldUpdateAnalysis) { - // Copy the BFI/BPI from Target to BodyBlock. - for (unsigned I = 0, E = BodyBlock->getTerminator()->getNumSuccessors(); - I < E; ++I) - BPI->setEdgeProbability(BodyBlock, I, - BPI->getEdgeProbability(Target, I)); - BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target).getFrequency()); - } - // It's possible Target was its own successor through an indirectbr. - // In this case, the indirectbr now comes from BodyBlock. - if (IBRPred == Target) - IBRPred = BodyBlock; - - // At this point Target only has PHIs, and BodyBlock has the rest of the - // block's body. Create a copy of Target that will be used by the "direct" - // preds. - ValueToValueMapTy VMap; - BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F); - - BlockFrequency BlockFreqForDirectSucc; - for (BasicBlock *Pred : OtherPreds) { - // If the target is a loop to itself, then the terminator of the split - // block (BodyBlock) needs to be updated. - BasicBlock *Src = Pred != Target ? Pred : BodyBlock; - Src->getTerminator()->replaceUsesOfWith(Target, DirectSucc); - if (ShouldUpdateAnalysis) - BlockFreqForDirectSucc += BFI->getBlockFreq(Src) * - BPI->getEdgeProbability(Src, DirectSucc); - } - if (ShouldUpdateAnalysis) { - BFI->setBlockFreq(DirectSucc, BlockFreqForDirectSucc.getFrequency()); - BlockFrequency NewBlockFreqForTarget = - BFI->getBlockFreq(Target) - BlockFreqForDirectSucc; - BFI->setBlockFreq(Target, NewBlockFreqForTarget.getFrequency()); - BPI->eraseBlock(Target); - } - - // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that - // they are clones, so the number of PHIs are the same. - // (a) Remove the edge coming from IBRPred from the "Direct" PHI - // (b) Leave that as the only edge in the "Indirect" PHI. - // (c) Merge the two in the body block. - BasicBlock::iterator Indirect = Target->begin(), - End = Target->getFirstNonPHI()->getIterator(); - BasicBlock::iterator Direct = DirectSucc->begin(); - BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt(); - - assert(&*End == Target->getTerminator() && - "Block was expected to only contain PHIs"); - - while (Indirect != End) { - PHINode *DirPHI = cast<PHINode>(Direct); - PHINode *IndPHI = cast<PHINode>(Indirect); - - // Now, clean up - the direct block shouldn't get the indirect value, - // and vice versa. - DirPHI->removeIncomingValue(IBRPred); - Direct++; - - // Advance the pointer here, to avoid invalidation issues when the old - // PHI is erased. - Indirect++; - - PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI); - NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred), - IBRPred); - - // Create a PHI in the body block, to merge the direct and indirect - // predecessors. - PHINode *MergePHI = - PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert); - MergePHI->addIncoming(NewIndPHI, Target); - MergePHI->addIncoming(DirPHI, DirectSucc); - - IndPHI->replaceAllUsesWith(MergePHI); - IndPHI->eraseFromParent(); - } - - Changed = true; - } - - return Changed; -} diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp deleted file mode 100644 index 27f110e24f9c..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ /dev/null @@ -1,1340 +0,0 @@ -//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements some functions that will create standard C libcalls. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/BuildLibCalls.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/Analysis/MemoryBuiltins.h" - -using namespace llvm; - -#define DEBUG_TYPE "build-libcalls" - -//- Infer Attributes ---------------------------------------------------------// - -STATISTIC(NumReadNone, "Number of functions inferred as readnone"); -STATISTIC(NumReadOnly, "Number of functions inferred as readonly"); -STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly"); -STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind"); -STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture"); -STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly"); -STATISTIC(NumNoAlias, "Number of function returns inferred as noalias"); -STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns"); -STATISTIC(NumReturnedArg, "Number of arguments inferred as returned"); - -static bool setDoesNotAccessMemory(Function &F) { - if (F.doesNotAccessMemory()) - return false; - F.setDoesNotAccessMemory(); - ++NumReadNone; - return true; -} - -static bool setOnlyReadsMemory(Function &F) { - if (F.onlyReadsMemory()) - return false; - F.setOnlyReadsMemory(); - ++NumReadOnly; - return true; -} - -static bool setOnlyAccessesArgMemory(Function &F) { - if (F.onlyAccessesArgMemory()) - return false; - F.setOnlyAccessesArgMemory(); - ++NumArgMemOnly; - return true; -} - -static bool setDoesNotThrow(Function &F) { - if (F.doesNotThrow()) - return false; - F.setDoesNotThrow(); - ++NumNoUnwind; - return true; -} - -static bool setRetDoesNotAlias(Function &F) { - if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias)) - return false; - F.addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); - ++NumNoAlias; - return true; -} - -static bool setDoesNotCapture(Function &F, unsigned ArgNo) { - if (F.hasParamAttribute(ArgNo, Attribute::NoCapture)) - return false; - F.addParamAttr(ArgNo, Attribute::NoCapture); - ++NumNoCapture; - return true; -} - -static bool setOnlyReadsMemory(Function &F, unsigned ArgNo) { - if (F.hasParamAttribute(ArgNo, Attribute::ReadOnly)) - return false; - F.addParamAttr(ArgNo, Attribute::ReadOnly); - ++NumReadOnlyArg; - return true; -} - -static bool setRetNonNull(Function &F) { - assert(F.getReturnType()->isPointerTy() && - "nonnull applies only to pointers"); - if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NonNull)) - return false; - F.addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); - ++NumNonNull; - return true; -} - -static bool setReturnedArg(Function &F, unsigned ArgNo) { - if (F.hasParamAttribute(ArgNo, Attribute::Returned)) - return false; - F.addParamAttr(ArgNo, Attribute::Returned); - ++NumReturnedArg; - return true; -} - -static bool setNonLazyBind(Function &F) { - if (F.hasFnAttribute(Attribute::NonLazyBind)) - return false; - F.addFnAttr(Attribute::NonLazyBind); - return true; -} - -static bool setDoesNotFreeMemory(Function &F) { - if (F.hasFnAttribute(Attribute::NoFree)) - return false; - F.addFnAttr(Attribute::NoFree); - return true; -} - -bool llvm::inferLibFuncAttributes(Module *M, StringRef Name, - const TargetLibraryInfo &TLI) { - Function *F = M->getFunction(Name); - if (!F) - return false; - return inferLibFuncAttributes(*F, TLI); -} - -bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { - LibFunc TheLibFunc; - if (!(TLI.getLibFunc(F, TheLibFunc) && TLI.has(TheLibFunc))) - return false; - - bool Changed = false; - - if(!isLibFreeFunction(&F, TheLibFunc) && !isReallocLikeFn(&F, &TLI)) - Changed |= setDoesNotFreeMemory(F); - - if (F.getParent() != nullptr && F.getParent()->getRtLibUseGOT()) - Changed |= setNonLazyBind(F); - - switch (TheLibFunc) { - case LibFunc_strlen: - case LibFunc_wcslen: - Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotThrow(F); - Changed |= setOnlyAccessesArgMemory(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; - case LibFunc_strchr: - case LibFunc_strrchr: - Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotThrow(F); - return Changed; - case LibFunc_strtol: - case LibFunc_strtod: - case LibFunc_strtof: - case LibFunc_strtoul: - case LibFunc_strtoll: - case LibFunc_strtold: - case LibFunc_strtoull: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_strcpy: - case LibFunc_strncpy: - case LibFunc_strcat: - case LibFunc_strncat: - Changed |= setReturnedArg(F, 0); - LLVM_FALLTHROUGH; - case LibFunc_stpcpy: - case LibFunc_stpncpy: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_strxfrm: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_strcmp: // 0,1 - case LibFunc_strspn: // 0,1 - case LibFunc_strncmp: // 0,1 - case LibFunc_strcspn: // 0,1 - case LibFunc_strcoll: // 0,1 - case LibFunc_strcasecmp: // 0,1 - case LibFunc_strncasecmp: // - Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc_strstr: - case LibFunc_strpbrk: - Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc_strtok: - case LibFunc_strtok_r: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_scanf: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_setbuf: - case LibFunc_setvbuf: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; - case LibFunc_strdup: - case LibFunc_strndup: - Changed |= setDoesNotThrow(F); - Changed |= setRetDoesNotAlias(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_stat: - case LibFunc_statvfs: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_sscanf: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 0); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_sprintf: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_snprintf: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc_setitimer: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_system: - // May throw; "system" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_malloc: - Changed |= setDoesNotThrow(F); - Changed |= setRetDoesNotAlias(F); - return Changed; - case LibFunc_memcmp: - Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc_memchr: - case LibFunc_memrchr: - Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotThrow(F); - return Changed; - case LibFunc_modf: - case LibFunc_modff: - case LibFunc_modfl: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc_memcpy: - case LibFunc_memmove: - Changed |= setReturnedArg(F, 0); - LLVM_FALLTHROUGH; - case LibFunc_mempcpy: - case LibFunc_memccpy: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_memcpy_chk: - Changed |= setDoesNotThrow(F); - return Changed; - case LibFunc_memalign: - Changed |= setRetDoesNotAlias(F); - return Changed; - case LibFunc_mkdir: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_mktime: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; - case LibFunc_realloc: - Changed |= setDoesNotThrow(F); - Changed |= setRetDoesNotAlias(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; - case LibFunc_read: - // May throw; "read" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc_rewind: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; - case LibFunc_rmdir: - case LibFunc_remove: - case LibFunc_realpath: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_rename: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 0); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_readlink: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_write: - // May throw; "write" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_bcopy: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_bcmp: - Changed |= setDoesNotThrow(F); - Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc_bzero: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; - case LibFunc_calloc: - Changed |= setDoesNotThrow(F); - Changed |= setRetDoesNotAlias(F); - return Changed; - case LibFunc_chmod: - case LibFunc_chown: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_ctermid: - case LibFunc_clearerr: - case LibFunc_closedir: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; - case LibFunc_atoi: - case LibFunc_atol: - case LibFunc_atof: - case LibFunc_atoll: - Changed |= setDoesNotThrow(F); - Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; - case LibFunc_access: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_fopen: - Changed |= setDoesNotThrow(F); - Changed |= setRetDoesNotAlias(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 0); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_fdopen: - Changed |= setDoesNotThrow(F); - Changed |= setRetDoesNotAlias(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_feof: - case LibFunc_free: - case LibFunc_fseek: - case LibFunc_ftell: - case LibFunc_fgetc: - case LibFunc_fgetc_unlocked: - case LibFunc_fseeko: - case LibFunc_ftello: - case LibFunc_fileno: - case LibFunc_fflush: - case LibFunc_fclose: - case LibFunc_fsetpos: - case LibFunc_flockfile: - case LibFunc_funlockfile: - case LibFunc_ftrylockfile: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; - case LibFunc_ferror: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F); - return Changed; - case LibFunc_fputc: - case LibFunc_fputc_unlocked: - case LibFunc_fstat: - case LibFunc_frexp: - case LibFunc_frexpf: - case LibFunc_frexpl: - case LibFunc_fstatvfs: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc_fgets: - case LibFunc_fgets_unlocked: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - return Changed; - case LibFunc_fread: - case LibFunc_fread_unlocked: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 3); - return Changed; - case LibFunc_fwrite: - case LibFunc_fwrite_unlocked: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 3); - // FIXME: readonly #1? - return Changed; - case LibFunc_fputs: - case LibFunc_fputs_unlocked: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_fscanf: - case LibFunc_fprintf: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_fgetpos: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc_getc: - case LibFunc_getlogin_r: - case LibFunc_getc_unlocked: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; - case LibFunc_getenv: - Changed |= setDoesNotThrow(F); - Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; - case LibFunc_gets: - case LibFunc_getchar: - case LibFunc_getchar_unlocked: - Changed |= setDoesNotThrow(F); - return Changed; - case LibFunc_getitimer: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc_getpwnam: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_ungetc: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc_uname: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; - case LibFunc_unlink: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_unsetenv: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_utime: - case LibFunc_utimes: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 0); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_putc: - case LibFunc_putc_unlocked: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc_puts: - case LibFunc_printf: - case LibFunc_perror: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_pread: - // May throw; "pread" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc_pwrite: - // May throw; "pwrite" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_putchar: - case LibFunc_putchar_unlocked: - Changed |= setDoesNotThrow(F); - return Changed; - case LibFunc_popen: - Changed |= setDoesNotThrow(F); - Changed |= setRetDoesNotAlias(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 0); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_pclose: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; - case LibFunc_vscanf: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_vsscanf: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 0); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_vfscanf: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_valloc: - Changed |= setDoesNotThrow(F); - Changed |= setRetDoesNotAlias(F); - return Changed; - case LibFunc_vprintf: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_vfprintf: - case LibFunc_vsprintf: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_vsnprintf: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc_open: - // May throw; "open" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_opendir: - Changed |= setDoesNotThrow(F); - Changed |= setRetDoesNotAlias(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_tmpfile: - Changed |= setDoesNotThrow(F); - Changed |= setRetDoesNotAlias(F); - return Changed; - case LibFunc_times: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; - case LibFunc_htonl: - case LibFunc_htons: - case LibFunc_ntohl: - case LibFunc_ntohs: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAccessMemory(F); - return Changed; - case LibFunc_lstat: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_lchown: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_qsort: - // May throw; places call through function pointer. - Changed |= setDoesNotCapture(F, 3); - return Changed; - case LibFunc_dunder_strdup: - case LibFunc_dunder_strndup: - Changed |= setDoesNotThrow(F); - Changed |= setRetDoesNotAlias(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_dunder_strtok_r: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_under_IO_getc: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; - case LibFunc_under_IO_putc: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc_dunder_isoc99_scanf: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_stat64: - case LibFunc_lstat64: - case LibFunc_statvfs64: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_dunder_isoc99_sscanf: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 0); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_fopen64: - Changed |= setDoesNotThrow(F); - Changed |= setRetDoesNotAlias(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 0); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc_fseeko64: - case LibFunc_ftello64: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; - case LibFunc_tmpfile64: - Changed |= setDoesNotThrow(F); - Changed |= setRetDoesNotAlias(F); - return Changed; - case LibFunc_fstat64: - case LibFunc_fstatvfs64: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc_open64: - // May throw; "open" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 0); - Changed |= setOnlyReadsMemory(F, 0); - return Changed; - case LibFunc_gettimeofday: - // Currently some platforms have the restrict keyword on the arguments to - // gettimeofday. To be conservative, do not add noalias to gettimeofday's - // arguments. - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc_Znwj: // new(unsigned int) - case LibFunc_Znwm: // new(unsigned long) - case LibFunc_Znaj: // new[](unsigned int) - case LibFunc_Znam: // new[](unsigned long) - case LibFunc_msvc_new_int: // new(unsigned int) - case LibFunc_msvc_new_longlong: // new(unsigned long long) - case LibFunc_msvc_new_array_int: // new[](unsigned int) - case LibFunc_msvc_new_array_longlong: // new[](unsigned long long) - // Operator new always returns a nonnull noalias pointer - Changed |= setRetNonNull(F); - Changed |= setRetDoesNotAlias(F); - return Changed; - // TODO: add LibFunc entries for: - // case LibFunc_memset_pattern4: - // case LibFunc_memset_pattern8: - case LibFunc_memset_pattern16: - Changed |= setOnlyAccessesArgMemory(F); - Changed |= setDoesNotCapture(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - // int __nvvm_reflect(const char *) - case LibFunc_nvvm_reflect: - Changed |= setDoesNotAccessMemory(F); - Changed |= setDoesNotThrow(F); - return Changed; - - default: - // FIXME: It'd be really nice to cover all the library functions we're - // aware of here. - return false; - } -} - -bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, - LibFunc DoubleFn, LibFunc FloatFn, - LibFunc LongDoubleFn) { - switch (Ty->getTypeID()) { - case Type::HalfTyID: - return false; - case Type::FloatTyID: - return TLI->has(FloatFn); - case Type::DoubleTyID: - return TLI->has(DoubleFn); - default: - return TLI->has(LongDoubleFn); - } -} - -StringRef llvm::getUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, - LibFunc DoubleFn, LibFunc FloatFn, - LibFunc LongDoubleFn) { - assert(hasUnaryFloatFn(TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) && - "Cannot get name for unavailable function!"); - - switch (Ty->getTypeID()) { - case Type::HalfTyID: - llvm_unreachable("No name for HalfTy!"); - case Type::FloatTyID: - return TLI->getName(FloatFn); - case Type::DoubleTyID: - return TLI->getName(DoubleFn); - default: - return TLI->getName(LongDoubleFn); - } -} - -//- Emit LibCalls ------------------------------------------------------------// - -Value *llvm::castToCStr(Value *V, IRBuilder<> &B) { - unsigned AS = V->getType()->getPointerAddressSpace(); - return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr"); -} - -static Value *emitLibCall(LibFunc TheLibFunc, Type *ReturnType, - ArrayRef<Type *> ParamTypes, - ArrayRef<Value *> Operands, IRBuilder<> &B, - const TargetLibraryInfo *TLI, - bool IsVaArgs = false) { - if (!TLI->has(TheLibFunc)) - return nullptr; - - Module *M = B.GetInsertBlock()->getModule(); - StringRef FuncName = TLI->getName(TheLibFunc); - FunctionType *FuncType = FunctionType::get(ReturnType, ParamTypes, IsVaArgs); - FunctionCallee Callee = M->getOrInsertFunction(FuncName, FuncType); - inferLibFuncAttributes(M, FuncName, *TLI); - CallInst *CI = B.CreateCall(Callee, Operands, FuncName); - if (const Function *F = - dyn_cast<Function>(Callee.getCallee()->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - return CI; -} - -Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL, - const TargetLibraryInfo *TLI) { - LLVMContext &Context = B.GetInsertBlock()->getContext(); - return emitLibCall(LibFunc_strlen, DL.getIntPtrType(Context), - B.getInt8PtrTy(), castToCStr(Ptr, B), B, TLI); -} - -Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B, - const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); - Type *I32Ty = B.getInt32Ty(); - return emitLibCall(LibFunc_strchr, I8Ptr, {I8Ptr, I32Ty}, - {castToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, B, TLI); -} - -Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, - const DataLayout &DL, const TargetLibraryInfo *TLI) { - LLVMContext &Context = B.GetInsertBlock()->getContext(); - return emitLibCall( - LibFunc_strncmp, B.getInt32Ty(), - {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)}, - {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI); -} - -Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, - const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); - return emitLibCall(LibFunc_strcpy, I8Ptr, {I8Ptr, I8Ptr}, - {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI); -} - -Value *llvm::emitStpCpy(Value *Dst, Value *Src, IRBuilder<> &B, - const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); - return emitLibCall(LibFunc_stpcpy, I8Ptr, {I8Ptr, I8Ptr}, - {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI); -} - -Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B, - const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); - return emitLibCall(LibFunc_strncpy, I8Ptr, {I8Ptr, I8Ptr, Len->getType()}, - {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI); -} - -Value *llvm::emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B, - const TargetLibraryInfo *TLI) { - Type *I8Ptr = B.getInt8PtrTy(); - return emitLibCall(LibFunc_stpncpy, I8Ptr, {I8Ptr, I8Ptr, Len->getType()}, - {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI); -} - -Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, - IRBuilder<> &B, const DataLayout &DL, - const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_memcpy_chk)) - return nullptr; - - Module *M = B.GetInsertBlock()->getModule(); - AttributeList AS; - AS = AttributeList::get(M->getContext(), AttributeList::FunctionIndex, - Attribute::NoUnwind); - LLVMContext &Context = B.GetInsertBlock()->getContext(); - FunctionCallee MemCpy = M->getOrInsertFunction( - "__memcpy_chk", AttributeList::get(M->getContext(), AS), B.getInt8PtrTy(), - B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), - DL.getIntPtrType(Context)); - Dst = castToCStr(Dst, B); - Src = castToCStr(Src, B); - CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize}); - if (const Function *F = - dyn_cast<Function>(MemCpy.getCallee()->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - return CI; -} - -Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B, - const DataLayout &DL, const TargetLibraryInfo *TLI) { - LLVMContext &Context = B.GetInsertBlock()->getContext(); - return emitLibCall( - LibFunc_memchr, B.getInt8PtrTy(), - {B.getInt8PtrTy(), B.getInt32Ty(), DL.getIntPtrType(Context)}, - {castToCStr(Ptr, B), Val, Len}, B, TLI); -} - -Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, - const DataLayout &DL, const TargetLibraryInfo *TLI) { - LLVMContext &Context = B.GetInsertBlock()->getContext(); - return emitLibCall( - LibFunc_memcmp, B.getInt32Ty(), - {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)}, - {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI); -} - -Value *llvm::emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, - const DataLayout &DL, const TargetLibraryInfo *TLI) { - LLVMContext &Context = B.GetInsertBlock()->getContext(); - return emitLibCall( - LibFunc_bcmp, B.getInt32Ty(), - {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)}, - {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI); -} - -Value *llvm::emitMemCCpy(Value *Ptr1, Value *Ptr2, Value *Val, Value *Len, - IRBuilder<> &B, const TargetLibraryInfo *TLI) { - return emitLibCall( - LibFunc_memccpy, B.getInt8PtrTy(), - {B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt32Ty(), Len->getType()}, - {Ptr1, Ptr2, Val, Len}, B, TLI); -} - -Value *llvm::emitSNPrintf(Value *Dest, Value *Size, Value *Fmt, - ArrayRef<Value *> VariadicArgs, IRBuilder<> &B, - const TargetLibraryInfo *TLI) { - SmallVector<Value *, 8> Args{castToCStr(Dest, B), Size, castToCStr(Fmt, B)}; - Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end()); - return emitLibCall(LibFunc_snprintf, B.getInt32Ty(), - {B.getInt8PtrTy(), Size->getType(), B.getInt8PtrTy()}, - Args, B, TLI, /*IsVaArgs=*/true); -} - -Value *llvm::emitSPrintf(Value *Dest, Value *Fmt, - ArrayRef<Value *> VariadicArgs, IRBuilder<> &B, - const TargetLibraryInfo *TLI) { - SmallVector<Value *, 8> Args{castToCStr(Dest, B), castToCStr(Fmt, B)}; - Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end()); - return emitLibCall(LibFunc_sprintf, B.getInt32Ty(), - {B.getInt8PtrTy(), B.getInt8PtrTy()}, Args, B, TLI, - /*IsVaArgs=*/true); -} - -Value *llvm::emitStrCat(Value *Dest, Value *Src, IRBuilder<> &B, - const TargetLibraryInfo *TLI) { - return emitLibCall(LibFunc_strcat, B.getInt8PtrTy(), - {B.getInt8PtrTy(), B.getInt8PtrTy()}, - {castToCStr(Dest, B), castToCStr(Src, B)}, B, TLI); -} - -Value *llvm::emitStrLCpy(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B, - const TargetLibraryInfo *TLI) { - return emitLibCall(LibFunc_strlcpy, Size->getType(), - {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()}, - {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI); -} - -Value *llvm::emitStrLCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B, - const TargetLibraryInfo *TLI) { - return emitLibCall(LibFunc_strlcat, Size->getType(), - {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()}, - {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI); -} - -Value *llvm::emitStrNCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B, - const TargetLibraryInfo *TLI) { - return emitLibCall(LibFunc_strncat, B.getInt8PtrTy(), - {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()}, - {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI); -} - -Value *llvm::emitVSNPrintf(Value *Dest, Value *Size, Value *Fmt, Value *VAList, - IRBuilder<> &B, const TargetLibraryInfo *TLI) { - return emitLibCall( - LibFunc_vsnprintf, B.getInt32Ty(), - {B.getInt8PtrTy(), Size->getType(), B.getInt8PtrTy(), VAList->getType()}, - {castToCStr(Dest, B), Size, castToCStr(Fmt, B), VAList}, B, TLI); -} - -Value *llvm::emitVSPrintf(Value *Dest, Value *Fmt, Value *VAList, - IRBuilder<> &B, const TargetLibraryInfo *TLI) { - return emitLibCall(LibFunc_vsprintf, B.getInt32Ty(), - {B.getInt8PtrTy(), B.getInt8PtrTy(), VAList->getType()}, - {castToCStr(Dest, B), castToCStr(Fmt, B), VAList}, B, TLI); -} - -/// Append a suffix to the function name according to the type of 'Op'. -static void appendTypeSuffix(Value *Op, StringRef &Name, - SmallString<20> &NameBuffer) { - if (!Op->getType()->isDoubleTy()) { - NameBuffer += Name; - - if (Op->getType()->isFloatTy()) - NameBuffer += 'f'; - else - NameBuffer += 'l'; - - Name = NameBuffer; - } -} - -static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name, - IRBuilder<> &B, - const AttributeList &Attrs) { - assert((Name != "") && "Must specify Name to emitUnaryFloatFnCall"); - - Module *M = B.GetInsertBlock()->getModule(); - FunctionCallee Callee = - M->getOrInsertFunction(Name, Op->getType(), Op->getType()); - CallInst *CI = B.CreateCall(Callee, Op, Name); - - // The incoming attribute set may have come from a speculatable intrinsic, but - // is being replaced with a library call which is not allowed to be - // speculatable. - CI->setAttributes(Attrs.removeAttribute(B.getContext(), - AttributeList::FunctionIndex, - Attribute::Speculatable)); - if (const Function *F = - dyn_cast<Function>(Callee.getCallee()->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; -} - -Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, - const AttributeList &Attrs) { - SmallString<20> NameBuffer; - appendTypeSuffix(Op, Name, NameBuffer); - - return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs); -} - -Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI, - LibFunc DoubleFn, LibFunc FloatFn, - LibFunc LongDoubleFn, IRBuilder<> &B, - const AttributeList &Attrs) { - // Get the name of the function according to TLI. - StringRef Name = getUnaryFloatFn(TLI, Op->getType(), - DoubleFn, FloatFn, LongDoubleFn); - - return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs); -} - -Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, - IRBuilder<> &B, const AttributeList &Attrs) { - assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall"); - - SmallString<20> NameBuffer; - appendTypeSuffix(Op1, Name, NameBuffer); - - Module *M = B.GetInsertBlock()->getModule(); - FunctionCallee Callee = M->getOrInsertFunction( - Name, Op1->getType(), Op1->getType(), Op2->getType()); - CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name); - CI->setAttributes(Attrs); - if (const Function *F = - dyn_cast<Function>(Callee.getCallee()->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; -} - -Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B, - const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_putchar)) - return nullptr; - - Module *M = B.GetInsertBlock()->getModule(); - StringRef PutCharName = TLI->getName(LibFunc_putchar); - FunctionCallee PutChar = - M->getOrInsertFunction(PutCharName, B.getInt32Ty(), B.getInt32Ty()); - inferLibFuncAttributes(M, PutCharName, *TLI); - CallInst *CI = B.CreateCall(PutChar, - B.CreateIntCast(Char, - B.getInt32Ty(), - /*isSigned*/true, - "chari"), - PutCharName); - - if (const Function *F = - dyn_cast<Function>(PutChar.getCallee()->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - return CI; -} - -Value *llvm::emitPutS(Value *Str, IRBuilder<> &B, - const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_puts)) - return nullptr; - - Module *M = B.GetInsertBlock()->getModule(); - StringRef PutsName = TLI->getName(LibFunc_puts); - FunctionCallee PutS = - M->getOrInsertFunction(PutsName, B.getInt32Ty(), B.getInt8PtrTy()); - inferLibFuncAttributes(M, PutsName, *TLI); - CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), PutsName); - if (const Function *F = - dyn_cast<Function>(PutS.getCallee()->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - return CI; -} - -Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B, - const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_fputc)) - return nullptr; - - Module *M = B.GetInsertBlock()->getModule(); - StringRef FPutcName = TLI->getName(LibFunc_fputc); - FunctionCallee F = M->getOrInsertFunction(FPutcName, B.getInt32Ty(), - B.getInt32Ty(), File->getType()); - if (File->getType()->isPointerTy()) - inferLibFuncAttributes(M, FPutcName, *TLI); - Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true, - "chari"); - CallInst *CI = B.CreateCall(F, {Char, File}, FPutcName); - - if (const Function *Fn = - dyn_cast<Function>(F.getCallee()->stripPointerCasts())) - CI->setCallingConv(Fn->getCallingConv()); - return CI; -} - -Value *llvm::emitFPutCUnlocked(Value *Char, Value *File, IRBuilder<> &B, - const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_fputc_unlocked)) - return nullptr; - - Module *M = B.GetInsertBlock()->getModule(); - StringRef FPutcUnlockedName = TLI->getName(LibFunc_fputc_unlocked); - FunctionCallee F = M->getOrInsertFunction(FPutcUnlockedName, B.getInt32Ty(), - B.getInt32Ty(), File->getType()); - if (File->getType()->isPointerTy()) - inferLibFuncAttributes(M, FPutcUnlockedName, *TLI); - Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/ true, "chari"); - CallInst *CI = B.CreateCall(F, {Char, File}, FPutcUnlockedName); - - if (const Function *Fn = - dyn_cast<Function>(F.getCallee()->stripPointerCasts())) - CI->setCallingConv(Fn->getCallingConv()); - return CI; -} - -Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B, - const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_fputs)) - return nullptr; - - Module *M = B.GetInsertBlock()->getModule(); - StringRef FPutsName = TLI->getName(LibFunc_fputs); - FunctionCallee F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(), - B.getInt8PtrTy(), File->getType()); - if (File->getType()->isPointerTy()) - inferLibFuncAttributes(M, FPutsName, *TLI); - CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsName); - - if (const Function *Fn = - dyn_cast<Function>(F.getCallee()->stripPointerCasts())) - CI->setCallingConv(Fn->getCallingConv()); - return CI; -} - -Value *llvm::emitFPutSUnlocked(Value *Str, Value *File, IRBuilder<> &B, - const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_fputs_unlocked)) - return nullptr; - - Module *M = B.GetInsertBlock()->getModule(); - StringRef FPutsUnlockedName = TLI->getName(LibFunc_fputs_unlocked); - FunctionCallee F = M->getOrInsertFunction(FPutsUnlockedName, B.getInt32Ty(), - B.getInt8PtrTy(), File->getType()); - if (File->getType()->isPointerTy()) - inferLibFuncAttributes(M, FPutsUnlockedName, *TLI); - CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsUnlockedName); - - if (const Function *Fn = - dyn_cast<Function>(F.getCallee()->stripPointerCasts())) - CI->setCallingConv(Fn->getCallingConv()); - return CI; -} - -Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B, - const DataLayout &DL, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_fwrite)) - return nullptr; - - Module *M = B.GetInsertBlock()->getModule(); - LLVMContext &Context = B.GetInsertBlock()->getContext(); - StringRef FWriteName = TLI->getName(LibFunc_fwrite); - FunctionCallee F = M->getOrInsertFunction( - FWriteName, DL.getIntPtrType(Context), B.getInt8PtrTy(), - DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType()); - - if (File->getType()->isPointerTy()) - inferLibFuncAttributes(M, FWriteName, *TLI); - CallInst *CI = - B.CreateCall(F, {castToCStr(Ptr, B), Size, - ConstantInt::get(DL.getIntPtrType(Context), 1), File}); - - if (const Function *Fn = - dyn_cast<Function>(F.getCallee()->stripPointerCasts())) - CI->setCallingConv(Fn->getCallingConv()); - return CI; -} - -Value *llvm::emitMalloc(Value *Num, IRBuilder<> &B, const DataLayout &DL, - const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_malloc)) - return nullptr; - - Module *M = B.GetInsertBlock()->getModule(); - StringRef MallocName = TLI->getName(LibFunc_malloc); - LLVMContext &Context = B.GetInsertBlock()->getContext(); - FunctionCallee Malloc = M->getOrInsertFunction(MallocName, B.getInt8PtrTy(), - DL.getIntPtrType(Context)); - inferLibFuncAttributes(M, MallocName, *TLI); - CallInst *CI = B.CreateCall(Malloc, Num, MallocName); - - if (const Function *F = - dyn_cast<Function>(Malloc.getCallee()->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; -} - -Value *llvm::emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs, - IRBuilder<> &B, const TargetLibraryInfo &TLI) { - if (!TLI.has(LibFunc_calloc)) - return nullptr; - - Module *M = B.GetInsertBlock()->getModule(); - StringRef CallocName = TLI.getName(LibFunc_calloc); - const DataLayout &DL = M->getDataLayout(); - IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext())); - FunctionCallee Calloc = M->getOrInsertFunction( - CallocName, Attrs, B.getInt8PtrTy(), PtrType, PtrType); - inferLibFuncAttributes(M, CallocName, TLI); - CallInst *CI = B.CreateCall(Calloc, {Num, Size}, CallocName); - - if (const auto *F = - dyn_cast<Function>(Calloc.getCallee()->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; -} - -Value *llvm::emitFWriteUnlocked(Value *Ptr, Value *Size, Value *N, Value *File, - IRBuilder<> &B, const DataLayout &DL, - const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_fwrite_unlocked)) - return nullptr; - - Module *M = B.GetInsertBlock()->getModule(); - LLVMContext &Context = B.GetInsertBlock()->getContext(); - StringRef FWriteUnlockedName = TLI->getName(LibFunc_fwrite_unlocked); - FunctionCallee F = M->getOrInsertFunction( - FWriteUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(), - DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType()); - - if (File->getType()->isPointerTy()) - inferLibFuncAttributes(M, FWriteUnlockedName, *TLI); - CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File}); - - if (const Function *Fn = - dyn_cast<Function>(F.getCallee()->stripPointerCasts())) - CI->setCallingConv(Fn->getCallingConv()); - return CI; -} - -Value *llvm::emitFGetCUnlocked(Value *File, IRBuilder<> &B, - const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_fgetc_unlocked)) - return nullptr; - - Module *M = B.GetInsertBlock()->getModule(); - StringRef FGetCUnlockedName = TLI->getName(LibFunc_fgetc_unlocked); - FunctionCallee F = M->getOrInsertFunction(FGetCUnlockedName, B.getInt32Ty(), - File->getType()); - if (File->getType()->isPointerTy()) - inferLibFuncAttributes(M, FGetCUnlockedName, *TLI); - CallInst *CI = B.CreateCall(F, File, FGetCUnlockedName); - - if (const Function *Fn = - dyn_cast<Function>(F.getCallee()->stripPointerCasts())) - CI->setCallingConv(Fn->getCallingConv()); - return CI; -} - -Value *llvm::emitFGetSUnlocked(Value *Str, Value *Size, Value *File, - IRBuilder<> &B, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_fgets_unlocked)) - return nullptr; - - Module *M = B.GetInsertBlock()->getModule(); - StringRef FGetSUnlockedName = TLI->getName(LibFunc_fgets_unlocked); - FunctionCallee F = - M->getOrInsertFunction(FGetSUnlockedName, B.getInt8PtrTy(), - B.getInt8PtrTy(), B.getInt32Ty(), File->getType()); - inferLibFuncAttributes(M, FGetSUnlockedName, *TLI); - CallInst *CI = - B.CreateCall(F, {castToCStr(Str, B), Size, File}, FGetSUnlockedName); - - if (const Function *Fn = - dyn_cast<Function>(F.getCallee()->stripPointerCasts())) - CI->setCallingConv(Fn->getCallingConv()); - return CI; -} - -Value *llvm::emitFReadUnlocked(Value *Ptr, Value *Size, Value *N, Value *File, - IRBuilder<> &B, const DataLayout &DL, - const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_fread_unlocked)) - return nullptr; - - Module *M = B.GetInsertBlock()->getModule(); - LLVMContext &Context = B.GetInsertBlock()->getContext(); - StringRef FReadUnlockedName = TLI->getName(LibFunc_fread_unlocked); - FunctionCallee F = M->getOrInsertFunction( - FReadUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(), - DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType()); - - if (File->getType()->isPointerTy()) - inferLibFuncAttributes(M, FReadUnlockedName, *TLI); - CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File}); - - if (const Function *Fn = - dyn_cast<Function>(F.getCallee()->stripPointerCasts())) - CI->setCallingConv(Fn->getCallingConv()); - return CI; -} diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp deleted file mode 100644 index df299f673f65..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp +++ /dev/null @@ -1,474 +0,0 @@ -//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains an optimization for div and rem on architectures that -// execute short instructions significantly faster than longer instructions. -// For example, on Intel Atom 32-bit divides are slow enough that during -// runtime it is profitable to check the value of the operands, and if they are -// positive and less than 256 use an unsigned 8-bit divide. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/BypassSlowDivision.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/KnownBits.h" -#include <cassert> -#include <cstdint> - -using namespace llvm; - -#define DEBUG_TYPE "bypass-slow-division" - -namespace { - - struct QuotRemPair { - Value *Quotient; - Value *Remainder; - - QuotRemPair(Value *InQuotient, Value *InRemainder) - : Quotient(InQuotient), Remainder(InRemainder) {} - }; - - /// A quotient and remainder, plus a BB from which they logically "originate". - /// If you use Quotient or Remainder in a Phi node, you should use BB as its - /// corresponding predecessor. - struct QuotRemWithBB { - BasicBlock *BB = nullptr; - Value *Quotient = nullptr; - Value *Remainder = nullptr; - }; - -using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>; -using BypassWidthsTy = DenseMap<unsigned, unsigned>; -using VisitedSetTy = SmallPtrSet<Instruction *, 4>; - -enum ValueRange { - /// Operand definitely fits into BypassType. No runtime checks are needed. - VALRNG_KNOWN_SHORT, - /// A runtime check is required, as value range is unknown. - VALRNG_UNKNOWN, - /// Operand is unlikely to fit into BypassType. The bypassing should be - /// disabled. - VALRNG_LIKELY_LONG -}; - -class FastDivInsertionTask { - bool IsValidTask = false; - Instruction *SlowDivOrRem = nullptr; - IntegerType *BypassType = nullptr; - BasicBlock *MainBB = nullptr; - - bool isHashLikeValue(Value *V, VisitedSetTy &Visited); - ValueRange getValueRange(Value *Op, VisitedSetTy &Visited); - QuotRemWithBB createSlowBB(BasicBlock *Successor); - QuotRemWithBB createFastBB(BasicBlock *Successor); - QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS, - BasicBlock *PhiBB); - Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2); - Optional<QuotRemPair> insertFastDivAndRem(); - - bool isSignedOp() { - return SlowDivOrRem->getOpcode() == Instruction::SDiv || - SlowDivOrRem->getOpcode() == Instruction::SRem; - } - - bool isDivisionOp() { - return SlowDivOrRem->getOpcode() == Instruction::SDiv || - SlowDivOrRem->getOpcode() == Instruction::UDiv; - } - - Type *getSlowType() { return SlowDivOrRem->getType(); } - -public: - FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths); - - Value *getReplacement(DivCacheTy &Cache); -}; - -} // end anonymous namespace - -FastDivInsertionTask::FastDivInsertionTask(Instruction *I, - const BypassWidthsTy &BypassWidths) { - switch (I->getOpcode()) { - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::URem: - case Instruction::SRem: - SlowDivOrRem = I; - break; - default: - // I is not a div/rem operation. - return; - } - - // Skip division on vector types. Only optimize integer instructions. - IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType()); - if (!SlowType) - return; - - // Skip if this bitwidth is not bypassed. - auto BI = BypassWidths.find(SlowType->getBitWidth()); - if (BI == BypassWidths.end()) - return; - - // Get type for div/rem instruction with bypass bitwidth. - IntegerType *BT = IntegerType::get(I->getContext(), BI->second); - BypassType = BT; - - // The original basic block. - MainBB = I->getParent(); - - // The instruction is indeed a slow div or rem operation. - IsValidTask = true; -} - -/// Reuses previously-computed dividend or remainder from the current BB if -/// operands and operation are identical. Otherwise calls insertFastDivAndRem to -/// perform the optimization and caches the resulting dividend and remainder. -/// If no replacement can be generated, nullptr is returned. -Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) { - // First, make sure that the task is valid. - if (!IsValidTask) - return nullptr; - - // Then, look for a value in Cache. - Value *Dividend = SlowDivOrRem->getOperand(0); - Value *Divisor = SlowDivOrRem->getOperand(1); - DivRemMapKey Key(isSignedOp(), Dividend, Divisor); - auto CacheI = Cache.find(Key); - - if (CacheI == Cache.end()) { - // If previous instance does not exist, try to insert fast div. - Optional<QuotRemPair> OptResult = insertFastDivAndRem(); - // Bail out if insertFastDivAndRem has failed. - if (!OptResult) - return nullptr; - CacheI = Cache.insert({Key, *OptResult}).first; - } - - QuotRemPair &Value = CacheI->second; - return isDivisionOp() ? Value.Quotient : Value.Remainder; -} - -/// Check if a value looks like a hash. -/// -/// The routine is expected to detect values computed using the most common hash -/// algorithms. Typically, hash computations end with one of the following -/// instructions: -/// -/// 1) MUL with a constant wider than BypassType -/// 2) XOR instruction -/// -/// And even if we are wrong and the value is not a hash, it is still quite -/// unlikely that such values will fit into BypassType. -/// -/// To detect string hash algorithms like FNV we have to look through PHI-nodes. -/// It is implemented as a depth-first search for values that look neither long -/// nor hash-like. -bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) { - Instruction *I = dyn_cast<Instruction>(V); - if (!I) - return false; - - switch (I->getOpcode()) { - case Instruction::Xor: - return true; - case Instruction::Mul: { - // After Constant Hoisting pass, long constants may be represented as - // bitcast instructions. As a result, some constants may look like an - // instruction at first, and an additional check is necessary to find out if - // an operand is actually a constant. - Value *Op1 = I->getOperand(1); - ConstantInt *C = dyn_cast<ConstantInt>(Op1); - if (!C && isa<BitCastInst>(Op1)) - C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0)); - return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth(); - } - case Instruction::PHI: - // Stop IR traversal in case of a crazy input code. This limits recursion - // depth. - if (Visited.size() >= 16) - return false; - // Do not visit nodes that have been visited already. We return true because - // it means that we couldn't find any value that doesn't look hash-like. - if (Visited.find(I) != Visited.end()) - return true; - Visited.insert(I); - return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) { - // Ignore undef values as they probably don't affect the division - // operands. - return getValueRange(V, Visited) == VALRNG_LIKELY_LONG || - isa<UndefValue>(V); - }); - default: - return false; - } -} - -/// Check if an integer value fits into our bypass type. -ValueRange FastDivInsertionTask::getValueRange(Value *V, - VisitedSetTy &Visited) { - unsigned ShortLen = BypassType->getBitWidth(); - unsigned LongLen = V->getType()->getIntegerBitWidth(); - - assert(LongLen > ShortLen && "Value type must be wider than BypassType"); - unsigned HiBits = LongLen - ShortLen; - - const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout(); - KnownBits Known(LongLen); - - computeKnownBits(V, Known, DL); - - if (Known.countMinLeadingZeros() >= HiBits) - return VALRNG_KNOWN_SHORT; - - if (Known.countMaxLeadingZeros() < HiBits) - return VALRNG_LIKELY_LONG; - - // Long integer divisions are often used in hashtable implementations. It's - // not worth bypassing such divisions because hash values are extremely - // unlikely to have enough leading zeros. The call below tries to detect - // values that are unlikely to fit BypassType (including hashes). - if (isHashLikeValue(V, Visited)) - return VALRNG_LIKELY_LONG; - - return VALRNG_UNKNOWN; -} - -/// Add new basic block for slow div and rem operations and put it before -/// SuccessorBB. -QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) { - QuotRemWithBB DivRemPair; - DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "", - MainBB->getParent(), SuccessorBB); - IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin()); - - Value *Dividend = SlowDivOrRem->getOperand(0); - Value *Divisor = SlowDivOrRem->getOperand(1); - - if (isSignedOp()) { - DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor); - DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor); - } else { - DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor); - DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor); - } - - Builder.CreateBr(SuccessorBB); - return DivRemPair; -} - -/// Add new basic block for fast div and rem operations and put it before -/// SuccessorBB. -QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) { - QuotRemWithBB DivRemPair; - DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "", - MainBB->getParent(), SuccessorBB); - IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin()); - - Value *Dividend = SlowDivOrRem->getOperand(0); - Value *Divisor = SlowDivOrRem->getOperand(1); - Value *ShortDivisorV = - Builder.CreateCast(Instruction::Trunc, Divisor, BypassType); - Value *ShortDividendV = - Builder.CreateCast(Instruction::Trunc, Dividend, BypassType); - - // udiv/urem because this optimization only handles positive numbers. - Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV); - Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV); - DivRemPair.Quotient = - Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType()); - DivRemPair.Remainder = - Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType()); - Builder.CreateBr(SuccessorBB); - - return DivRemPair; -} - -/// Creates Phi nodes for result of Div and Rem. -QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS, - QuotRemWithBB &RHS, - BasicBlock *PhiBB) { - IRBuilder<> Builder(PhiBB, PhiBB->begin()); - PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2); - QuoPhi->addIncoming(LHS.Quotient, LHS.BB); - QuoPhi->addIncoming(RHS.Quotient, RHS.BB); - PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2); - RemPhi->addIncoming(LHS.Remainder, LHS.BB); - RemPhi->addIncoming(RHS.Remainder, RHS.BB); - return QuotRemPair(QuoPhi, RemPhi); -} - -/// Creates a runtime check to test whether both the divisor and dividend fit -/// into BypassType. The check is inserted at the end of MainBB. True return -/// value means that the operands fit. Either of the operands may be NULL if it -/// doesn't need a runtime check. -Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) { - assert((Op1 || Op2) && "Nothing to check"); - IRBuilder<> Builder(MainBB, MainBB->end()); - - Value *OrV; - if (Op1 && Op2) - OrV = Builder.CreateOr(Op1, Op2); - else - OrV = Op1 ? Op1 : Op2; - - // BitMask is inverted to check if the operands are - // larger than the bypass type - uint64_t BitMask = ~BypassType->getBitMask(); - Value *AndV = Builder.CreateAnd(OrV, BitMask); - - // Compare operand values - Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0); - return Builder.CreateICmpEQ(AndV, ZeroV); -} - -/// Substitutes the div/rem instruction with code that checks the value of the -/// operands and uses a shorter-faster div/rem instruction when possible. -Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() { - Value *Dividend = SlowDivOrRem->getOperand(0); - Value *Divisor = SlowDivOrRem->getOperand(1); - - VisitedSetTy SetL; - ValueRange DividendRange = getValueRange(Dividend, SetL); - if (DividendRange == VALRNG_LIKELY_LONG) - return None; - - VisitedSetTy SetR; - ValueRange DivisorRange = getValueRange(Divisor, SetR); - if (DivisorRange == VALRNG_LIKELY_LONG) - return None; - - bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT); - bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT); - - if (DividendShort && DivisorShort) { - // If both operands are known to be short then just replace the long - // division with a short one in-place. Since we're not introducing control - // flow in this case, narrowing the division is always a win, even if the - // divisor is a constant (and will later get replaced by a multiplication). - - IRBuilder<> Builder(SlowDivOrRem); - Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType); - Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType); - Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor); - Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor); - Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType()); - Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType()); - return QuotRemPair(ExtDiv, ExtRem); - } - - if (isa<ConstantInt>(Divisor)) { - // If the divisor is not a constant, DAGCombiner will convert it to a - // multiplication by a magic constant. It isn't clear if it is worth - // introducing control flow to get a narrower multiply. - return None; - } - - // After Constant Hoisting pass, long constants may be represented as - // bitcast instructions. As a result, some constants may look like an - // instruction at first, and an additional check is necessary to find out if - // an operand is actually a constant. - if (auto *BCI = dyn_cast<BitCastInst>(Divisor)) - if (BCI->getParent() == SlowDivOrRem->getParent() && - isa<ConstantInt>(BCI->getOperand(0))) - return None; - - if (DividendShort && !isSignedOp()) { - // If the division is unsigned and Dividend is known to be short, then - // either - // 1) Divisor is less or equal to Dividend, and the result can be computed - // with a short division. - // 2) Divisor is greater than Dividend. In this case, no division is needed - // at all: The quotient is 0 and the remainder is equal to Dividend. - // - // So instead of checking at runtime whether Divisor fits into BypassType, - // we emit a runtime check to differentiate between these two cases. This - // lets us entirely avoid a long div. - - // Split the basic block before the div/rem. - BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); - // Remove the unconditional branch from MainBB to SuccessorBB. - MainBB->getInstList().back().eraseFromParent(); - QuotRemWithBB Long; - Long.BB = MainBB; - Long.Quotient = ConstantInt::get(getSlowType(), 0); - Long.Remainder = Dividend; - QuotRemWithBB Fast = createFastBB(SuccessorBB); - QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB); - IRBuilder<> Builder(MainBB, MainBB->end()); - Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor); - Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB); - return Result; - } else { - // General case. Create both slow and fast div/rem pairs and choose one of - // them at runtime. - - // Split the basic block before the div/rem. - BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); - // Remove the unconditional branch from MainBB to SuccessorBB. - MainBB->getInstList().back().eraseFromParent(); - QuotRemWithBB Fast = createFastBB(SuccessorBB); - QuotRemWithBB Slow = createSlowBB(SuccessorBB); - QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB); - Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend, - DivisorShort ? nullptr : Divisor); - IRBuilder<> Builder(MainBB, MainBB->end()); - Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB); - return Result; - } -} - -/// This optimization identifies DIV/REM instructions in a BB that can be -/// profitably bypassed and carried out with a shorter, faster divide. -bool llvm::bypassSlowDivision(BasicBlock *BB, - const BypassWidthsTy &BypassWidths) { - DivCacheTy PerBBDivCache; - - bool MadeChange = false; - Instruction* Next = &*BB->begin(); - while (Next != nullptr) { - // We may add instructions immediately after I, but we want to skip over - // them. - Instruction* I = Next; - Next = Next->getNextNode(); - - FastDivInsertionTask Task(I, BypassWidths); - if (Value *Replacement = Task.getReplacement(PerBBDivCache)) { - I->replaceAllUsesWith(Replacement); - I->eraseFromParent(); - MadeChange = true; - } - } - - // Above we eagerly create divs and rems, as pairs, so that we can efficiently - // create divrem machine instructions. Now erase any unused divs / rems so we - // don't leave extra instructions sitting around. - for (auto &KV : PerBBDivCache) - for (Value *V : {KV.second.Quotient, KV.second.Remainder}) - RecursivelyDeleteTriviallyDeadInstructions(V); - - return MadeChange; -} diff --git a/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp deleted file mode 100644 index f04d76e70c0d..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp +++ /dev/null @@ -1,461 +0,0 @@ -//===- CallPromotionUtils.cpp - Utilities for call promotion ----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements utilities useful for promoting indirect call sites to -// direct call sites. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/CallPromotionUtils.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" - -using namespace llvm; - -#define DEBUG_TYPE "call-promotion-utils" - -/// Fix-up phi nodes in an invoke instruction's normal destination. -/// -/// After versioning an invoke instruction, values coming from the original -/// block will now be coming from the "merge" block. For example, in the code -/// below: -/// -/// then_bb: -/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst -/// -/// else_bb: -/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst -/// -/// merge_bb: -/// %t2 = phi i32 [ %t0, %then_bb ], [ %t1, %else_bb ] -/// br %normal_dst -/// -/// normal_dst: -/// %t3 = phi i32 [ %x, %orig_bb ], ... -/// -/// "orig_bb" is no longer a predecessor of "normal_dst", so the phi nodes in -/// "normal_dst" must be fixed to refer to "merge_bb": -/// -/// normal_dst: -/// %t3 = phi i32 [ %x, %merge_bb ], ... -/// -static void fixupPHINodeForNormalDest(InvokeInst *Invoke, BasicBlock *OrigBlock, - BasicBlock *MergeBlock) { - for (PHINode &Phi : Invoke->getNormalDest()->phis()) { - int Idx = Phi.getBasicBlockIndex(OrigBlock); - if (Idx == -1) - continue; - Phi.setIncomingBlock(Idx, MergeBlock); - } -} - -/// Fix-up phi nodes in an invoke instruction's unwind destination. -/// -/// After versioning an invoke instruction, values coming from the original -/// block will now be coming from either the "then" block or the "else" block. -/// For example, in the code below: -/// -/// then_bb: -/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst -/// -/// else_bb: -/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst -/// -/// unwind_dst: -/// %t3 = phi i32 [ %x, %orig_bb ], ... -/// -/// "orig_bb" is no longer a predecessor of "unwind_dst", so the phi nodes in -/// "unwind_dst" must be fixed to refer to "then_bb" and "else_bb": -/// -/// unwind_dst: -/// %t3 = phi i32 [ %x, %then_bb ], [ %x, %else_bb ], ... -/// -static void fixupPHINodeForUnwindDest(InvokeInst *Invoke, BasicBlock *OrigBlock, - BasicBlock *ThenBlock, - BasicBlock *ElseBlock) { - for (PHINode &Phi : Invoke->getUnwindDest()->phis()) { - int Idx = Phi.getBasicBlockIndex(OrigBlock); - if (Idx == -1) - continue; - auto *V = Phi.getIncomingValue(Idx); - Phi.setIncomingBlock(Idx, ThenBlock); - Phi.addIncoming(V, ElseBlock); - } -} - -/// Create a phi node for the returned value of a call or invoke instruction. -/// -/// After versioning a call or invoke instruction that returns a value, we have -/// to merge the value of the original and new instructions. We do this by -/// creating a phi node and replacing uses of the original instruction with this -/// phi node. -/// -/// For example, if \p OrigInst is defined in "else_bb" and \p NewInst is -/// defined in "then_bb", we create the following phi node: -/// -/// ; Uses of the original instruction are replaced by uses of the phi node. -/// %t0 = phi i32 [ %orig_inst, %else_bb ], [ %new_inst, %then_bb ], -/// -static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst, - BasicBlock *MergeBlock, IRBuilder<> &Builder) { - - if (OrigInst->getType()->isVoidTy() || OrigInst->use_empty()) - return; - - Builder.SetInsertPoint(&MergeBlock->front()); - PHINode *Phi = Builder.CreatePHI(OrigInst->getType(), 0); - SmallVector<User *, 16> UsersToUpdate; - for (User *U : OrigInst->users()) - UsersToUpdate.push_back(U); - for (User *U : UsersToUpdate) - U->replaceUsesOfWith(OrigInst, Phi); - Phi->addIncoming(OrigInst, OrigInst->getParent()); - Phi->addIncoming(NewInst, NewInst->getParent()); -} - -/// Cast a call or invoke instruction to the given type. -/// -/// When promoting a call site, the return type of the call site might not match -/// that of the callee. If this is the case, we have to cast the returned value -/// to the correct type. The location of the cast depends on if we have a call -/// or invoke instruction. -/// -/// For example, if the call instruction below requires a bitcast after -/// promotion: -/// -/// orig_bb: -/// %t0 = call i32 @func() -/// ... -/// -/// The bitcast is placed after the call instruction: -/// -/// orig_bb: -/// ; Uses of the original return value are replaced by uses of the bitcast. -/// %t0 = call i32 @func() -/// %t1 = bitcast i32 %t0 to ... -/// ... -/// -/// A similar transformation is performed for invoke instructions. However, -/// since invokes are terminating, a new block is created for the bitcast. For -/// example, if the invoke instruction below requires a bitcast after promotion: -/// -/// orig_bb: -/// %t0 = invoke i32 @func() to label %normal_dst unwind label %unwind_dst -/// -/// The edge between the original block and the invoke's normal destination is -/// split, and the bitcast is placed there: -/// -/// orig_bb: -/// %t0 = invoke i32 @func() to label %split_bb unwind label %unwind_dst -/// -/// split_bb: -/// ; Uses of the original return value are replaced by uses of the bitcast. -/// %t1 = bitcast i32 %t0 to ... -/// br label %normal_dst -/// -static void createRetBitCast(CallSite CS, Type *RetTy, CastInst **RetBitCast) { - - // Save the users of the calling instruction. These uses will be changed to - // use the bitcast after we create it. - SmallVector<User *, 16> UsersToUpdate; - for (User *U : CS.getInstruction()->users()) - UsersToUpdate.push_back(U); - - // Determine an appropriate location to create the bitcast for the return - // value. The location depends on if we have a call or invoke instruction. - Instruction *InsertBefore = nullptr; - if (auto *Invoke = dyn_cast<InvokeInst>(CS.getInstruction())) - InsertBefore = - &SplitEdge(Invoke->getParent(), Invoke->getNormalDest())->front(); - else - InsertBefore = &*std::next(CS.getInstruction()->getIterator()); - - // Bitcast the return value to the correct type. - auto *Cast = CastInst::CreateBitOrPointerCast(CS.getInstruction(), RetTy, "", - InsertBefore); - if (RetBitCast) - *RetBitCast = Cast; - - // Replace all the original uses of the calling instruction with the bitcast. - for (User *U : UsersToUpdate) - U->replaceUsesOfWith(CS.getInstruction(), Cast); -} - -/// Predicate and clone the given call site. -/// -/// This function creates an if-then-else structure at the location of the call -/// site. The "if" condition compares the call site's called value to the given -/// callee. The original call site is moved into the "else" block, and a clone -/// of the call site is placed in the "then" block. The cloned instruction is -/// returned. -/// -/// For example, the call instruction below: -/// -/// orig_bb: -/// %t0 = call i32 %ptr() -/// ... -/// -/// Is replace by the following: -/// -/// orig_bb: -/// %cond = icmp eq i32 ()* %ptr, @func -/// br i1 %cond, %then_bb, %else_bb -/// -/// then_bb: -/// ; The clone of the original call instruction is placed in the "then" -/// ; block. It is not yet promoted. -/// %t1 = call i32 %ptr() -/// br merge_bb -/// -/// else_bb: -/// ; The original call instruction is moved to the "else" block. -/// %t0 = call i32 %ptr() -/// br merge_bb -/// -/// merge_bb: -/// ; Uses of the original call instruction are replaced by uses of the phi -/// ; node. -/// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ] -/// ... -/// -/// A similar transformation is performed for invoke instructions. However, -/// since invokes are terminating, more work is required. For example, the -/// invoke instruction below: -/// -/// orig_bb: -/// %t0 = invoke %ptr() to label %normal_dst unwind label %unwind_dst -/// -/// Is replace by the following: -/// -/// orig_bb: -/// %cond = icmp eq i32 ()* %ptr, @func -/// br i1 %cond, %then_bb, %else_bb -/// -/// then_bb: -/// ; The clone of the original invoke instruction is placed in the "then" -/// ; block, and its normal destination is set to the "merge" block. It is -/// ; not yet promoted. -/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst -/// -/// else_bb: -/// ; The original invoke instruction is moved into the "else" block, and -/// ; its normal destination is set to the "merge" block. -/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst -/// -/// merge_bb: -/// ; Uses of the original invoke instruction are replaced by uses of the -/// ; phi node, and the merge block branches to the normal destination. -/// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ] -/// br %normal_dst -/// -static Instruction *versionCallSite(CallSite CS, Value *Callee, - MDNode *BranchWeights) { - - IRBuilder<> Builder(CS.getInstruction()); - Instruction *OrigInst = CS.getInstruction(); - BasicBlock *OrigBlock = OrigInst->getParent(); - - // Create the compare. The called value and callee must have the same type to - // be compared. - if (CS.getCalledValue()->getType() != Callee->getType()) - Callee = Builder.CreateBitCast(Callee, CS.getCalledValue()->getType()); - auto *Cond = Builder.CreateICmpEQ(CS.getCalledValue(), Callee); - - // Create an if-then-else structure. The original instruction is moved into - // the "else" block, and a clone of the original instruction is placed in the - // "then" block. - Instruction *ThenTerm = nullptr; - Instruction *ElseTerm = nullptr; - SplitBlockAndInsertIfThenElse(Cond, CS.getInstruction(), &ThenTerm, &ElseTerm, - BranchWeights); - BasicBlock *ThenBlock = ThenTerm->getParent(); - BasicBlock *ElseBlock = ElseTerm->getParent(); - BasicBlock *MergeBlock = OrigInst->getParent(); - - ThenBlock->setName("if.true.direct_targ"); - ElseBlock->setName("if.false.orig_indirect"); - MergeBlock->setName("if.end.icp"); - - Instruction *NewInst = OrigInst->clone(); - OrigInst->moveBefore(ElseTerm); - NewInst->insertBefore(ThenTerm); - - // If the original call site is an invoke instruction, we have extra work to - // do since invoke instructions are terminating. We have to fix-up phi nodes - // in the invoke's normal and unwind destinations. - if (auto *OrigInvoke = dyn_cast<InvokeInst>(OrigInst)) { - auto *NewInvoke = cast<InvokeInst>(NewInst); - - // Invoke instructions are terminating, so we don't need the terminator - // instructions that were just created. - ThenTerm->eraseFromParent(); - ElseTerm->eraseFromParent(); - - // Branch from the "merge" block to the original normal destination. - Builder.SetInsertPoint(MergeBlock); - Builder.CreateBr(OrigInvoke->getNormalDest()); - - // Fix-up phi nodes in the original invoke's normal and unwind destinations. - fixupPHINodeForNormalDest(OrigInvoke, OrigBlock, MergeBlock); - fixupPHINodeForUnwindDest(OrigInvoke, MergeBlock, ThenBlock, ElseBlock); - - // Now set the normal destinations of the invoke instructions to be the - // "merge" block. - OrigInvoke->setNormalDest(MergeBlock); - NewInvoke->setNormalDest(MergeBlock); - } - - // Create a phi node for the returned value of the call site. - createRetPHINode(OrigInst, NewInst, MergeBlock, Builder); - - return NewInst; -} - -bool llvm::isLegalToPromote(CallSite CS, Function *Callee, - const char **FailureReason) { - assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted"); - - auto &DL = Callee->getParent()->getDataLayout(); - - // Check the return type. The callee's return value type must be bitcast - // compatible with the call site's type. - Type *CallRetTy = CS.getInstruction()->getType(); - Type *FuncRetTy = Callee->getReturnType(); - if (CallRetTy != FuncRetTy) - if (!CastInst::isBitOrNoopPointerCastable(FuncRetTy, CallRetTy, DL)) { - if (FailureReason) - *FailureReason = "Return type mismatch"; - return false; - } - - // The number of formal arguments of the callee. - unsigned NumParams = Callee->getFunctionType()->getNumParams(); - - // Check the number of arguments. The callee and call site must agree on the - // number of arguments. - if (CS.arg_size() != NumParams && !Callee->isVarArg()) { - if (FailureReason) - *FailureReason = "The number of arguments mismatch"; - return false; - } - - // Check the argument types. The callee's formal argument types must be - // bitcast compatible with the corresponding actual argument types of the call - // site. - for (unsigned I = 0; I < NumParams; ++I) { - Type *FormalTy = Callee->getFunctionType()->getFunctionParamType(I); - Type *ActualTy = CS.getArgument(I)->getType(); - if (FormalTy == ActualTy) - continue; - if (!CastInst::isBitOrNoopPointerCastable(ActualTy, FormalTy, DL)) { - if (FailureReason) - *FailureReason = "Argument type mismatch"; - return false; - } - } - - return true; -} - -Instruction *llvm::promoteCall(CallSite CS, Function *Callee, - CastInst **RetBitCast) { - assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted"); - - // Set the called function of the call site to be the given callee (but don't - // change the type). - cast<CallBase>(CS.getInstruction())->setCalledOperand(Callee); - - // Since the call site will no longer be direct, we must clear metadata that - // is only appropriate for indirect calls. This includes !prof and !callees - // metadata. - CS.getInstruction()->setMetadata(LLVMContext::MD_prof, nullptr); - CS.getInstruction()->setMetadata(LLVMContext::MD_callees, nullptr); - - // If the function type of the call site matches that of the callee, no - // additional work is required. - if (CS.getFunctionType() == Callee->getFunctionType()) - return CS.getInstruction(); - - // Save the return types of the call site and callee. - Type *CallSiteRetTy = CS.getInstruction()->getType(); - Type *CalleeRetTy = Callee->getReturnType(); - - // Change the function type of the call site the match that of the callee. - CS.mutateFunctionType(Callee->getFunctionType()); - - // Inspect the arguments of the call site. If an argument's type doesn't - // match the corresponding formal argument's type in the callee, bitcast it - // to the correct type. - auto CalleeType = Callee->getFunctionType(); - auto CalleeParamNum = CalleeType->getNumParams(); - - LLVMContext &Ctx = Callee->getContext(); - const AttributeList &CallerPAL = CS.getAttributes(); - // The new list of argument attributes. - SmallVector<AttributeSet, 4> NewArgAttrs; - bool AttributeChanged = false; - - for (unsigned ArgNo = 0; ArgNo < CalleeParamNum; ++ArgNo) { - auto *Arg = CS.getArgument(ArgNo); - Type *FormalTy = CalleeType->getParamType(ArgNo); - Type *ActualTy = Arg->getType(); - if (FormalTy != ActualTy) { - auto *Cast = CastInst::CreateBitOrPointerCast(Arg, FormalTy, "", - CS.getInstruction()); - CS.setArgument(ArgNo, Cast); - - // Remove any incompatible attributes for the argument. - AttrBuilder ArgAttrs(CallerPAL.getParamAttributes(ArgNo)); - ArgAttrs.remove(AttributeFuncs::typeIncompatible(FormalTy)); - - // If byval is used, this must be a pointer type, and the byval type must - // match the element type. Update it if present. - if (ArgAttrs.getByValType()) { - Type *NewTy = Callee->getParamByValType(ArgNo); - ArgAttrs.addByValAttr( - NewTy ? NewTy : cast<PointerType>(FormalTy)->getElementType()); - } - - NewArgAttrs.push_back(AttributeSet::get(Ctx, ArgAttrs)); - AttributeChanged = true; - } else - NewArgAttrs.push_back(CallerPAL.getParamAttributes(ArgNo)); - } - - // If the return type of the call site doesn't match that of the callee, cast - // the returned value to the appropriate type. - // Remove any incompatible return value attribute. - AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex); - if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy) { - createRetBitCast(CS, CallSiteRetTy, RetBitCast); - RAttrs.remove(AttributeFuncs::typeIncompatible(CalleeRetTy)); - AttributeChanged = true; - } - - // Set the new callsite attribute. - if (AttributeChanged) - CS.setAttributes(AttributeList::get(Ctx, CallerPAL.getFnAttributes(), - AttributeSet::get(Ctx, RAttrs), - NewArgAttrs)); - - return CS.getInstruction(); -} - -Instruction *llvm::promoteCallWithIfThenElse(CallSite CS, Function *Callee, - MDNode *BranchWeights) { - - // Version the indirect call site. If the called value is equal to the given - // callee, 'NewInst' will be executed, otherwise the original call site will - // be executed. - Instruction *NewInst = versionCallSite(CS, Callee, BranchWeights); - - // Promote 'NewInst' so that it directly calls the desired function. - return promoteCall(CallSite(NewInst), Callee); -} - -#undef DEBUG_TYPE diff --git a/contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp b/contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp deleted file mode 100644 index 455fcbb1cf98..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp +++ /dev/null @@ -1,104 +0,0 @@ -//===- CanonicalizeAliases.cpp - ThinLTO Support: Canonicalize Aliases ----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Currently this file implements partial alias canonicalization, to -// flatten chains of aliases (also done by GlobalOpt, but not on for -// O0 compiles). E.g. -// @a = alias i8, i8 *@b -// @b = alias i8, i8 *@g -// -// will be converted to: -// @a = alias i8, i8 *@g <-- @a is now an alias to base object @g -// @b = alias i8, i8 *@g -// -// Eventually this file will implement full alias canonicalation, so that -// all aliasees are private anonymous values. E.g. -// @a = alias i8, i8 *@g -// @g = global i8 0 -// -// will be converted to: -// @0 = private global -// @a = alias i8, i8* @0 -// @g = alias i8, i8* @0 -// -// This simplifies optimization and ThinLTO linking of the original symbols. -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/CanonicalizeAliases.h" - -#include "llvm/IR/Operator.h" -#include "llvm/IR/ValueHandle.h" - -using namespace llvm; - -namespace { - -static Constant *canonicalizeAlias(Constant *C, bool &Changed) { - if (auto *GA = dyn_cast<GlobalAlias>(C)) { - auto *NewAliasee = canonicalizeAlias(GA->getAliasee(), Changed); - if (NewAliasee != GA->getAliasee()) { - GA->setAliasee(NewAliasee); - Changed = true; - } - return NewAliasee; - } - - auto *CE = dyn_cast<ConstantExpr>(C); - if (!CE) - return C; - - std::vector<Constant *> Ops; - for (Use &U : CE->operands()) - Ops.push_back(canonicalizeAlias(cast<Constant>(U), Changed)); - return CE->getWithOperands(Ops); -} - -/// Convert aliases to canonical form. -static bool canonicalizeAliases(Module &M) { - bool Changed = false; - for (auto &GA : M.aliases()) - canonicalizeAlias(&GA, Changed); - return Changed; -} - -// Legacy pass that canonicalizes aliases. -class CanonicalizeAliasesLegacyPass : public ModulePass { - -public: - /// Pass identification, replacement for typeid - static char ID; - - /// Specify pass name for debug output - StringRef getPassName() const override { return "Canonicalize Aliases"; } - - explicit CanonicalizeAliasesLegacyPass() : ModulePass(ID) {} - - bool runOnModule(Module &M) override { return canonicalizeAliases(M); } -}; -char CanonicalizeAliasesLegacyPass::ID = 0; - -} // anonymous namespace - -PreservedAnalyses CanonicalizeAliasesPass::run(Module &M, - ModuleAnalysisManager &AM) { - if (!canonicalizeAliases(M)) - return PreservedAnalyses::all(); - - return PreservedAnalyses::none(); -} - -INITIALIZE_PASS_BEGIN(CanonicalizeAliasesLegacyPass, "canonicalize-aliases", - "Canonicalize aliases", false, false) -INITIALIZE_PASS_END(CanonicalizeAliasesLegacyPass, "canonicalize-aliases", - "Canonicalize aliases", false, false) - -namespace llvm { -ModulePass *createCanonicalizeAliasesPass() { - return new CanonicalizeAliasesLegacyPass(); -} -} // namespace llvm diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp deleted file mode 100644 index 1026c9d37038..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ /dev/null @@ -1,863 +0,0 @@ -//===- CloneFunction.cpp - Clone a function into another function ---------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the CloneFunctionInto interface, which is used as the -// low-level function cloner. This is used by the CloneFunction and function -// inliner to do the dirty work of copying the body of a function around. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/DomTreeUpdater.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -#include <map> -using namespace llvm; - -/// See comments in Cloning.h. -BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, - const Twine &NameSuffix, Function *F, - ClonedCodeInfo *CodeInfo, - DebugInfoFinder *DIFinder) { - DenseMap<const MDNode *, MDNode *> Cache; - BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); - if (BB->hasName()) - NewBB->setName(BB->getName() + NameSuffix); - - bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; - Module *TheModule = F ? F->getParent() : nullptr; - - // Loop over all instructions, and copy them over. - for (const Instruction &I : *BB) { - if (DIFinder && TheModule) - DIFinder->processInstruction(*TheModule, I); - - Instruction *NewInst = I.clone(); - if (I.hasName()) - NewInst->setName(I.getName() + NameSuffix); - NewBB->getInstList().push_back(NewInst); - VMap[&I] = NewInst; // Add instruction map to value. - - hasCalls |= (isa<CallInst>(I) && !isa<DbgInfoIntrinsic>(I)); - if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) { - if (isa<ConstantInt>(AI->getArraySize())) - hasStaticAllocas = true; - else - hasDynamicAllocas = true; - } - } - - if (CodeInfo) { - CodeInfo->ContainsCalls |= hasCalls; - CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; - CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && - BB != &BB->getParent()->getEntryBlock(); - } - return NewBB; -} - -// Clone OldFunc into NewFunc, transforming the old arguments into references to -// VMap values. -// -void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, - ValueToValueMapTy &VMap, - bool ModuleLevelChanges, - SmallVectorImpl<ReturnInst*> &Returns, - const char *NameSuffix, ClonedCodeInfo *CodeInfo, - ValueMapTypeRemapper *TypeMapper, - ValueMaterializer *Materializer) { - assert(NameSuffix && "NameSuffix cannot be null!"); - -#ifndef NDEBUG - for (const Argument &I : OldFunc->args()) - assert(VMap.count(&I) && "No mapping from source argument specified!"); -#endif - - // Copy all attributes other than those stored in the AttributeList. We need - // to remap the parameter indices of the AttributeList. - AttributeList NewAttrs = NewFunc->getAttributes(); - NewFunc->copyAttributesFrom(OldFunc); - NewFunc->setAttributes(NewAttrs); - - // Fix up the personality function that got copied over. - if (OldFunc->hasPersonalityFn()) - NewFunc->setPersonalityFn( - MapValue(OldFunc->getPersonalityFn(), VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, - TypeMapper, Materializer)); - - SmallVector<AttributeSet, 4> NewArgAttrs(NewFunc->arg_size()); - AttributeList OldAttrs = OldFunc->getAttributes(); - - // Clone any argument attributes that are present in the VMap. - for (const Argument &OldArg : OldFunc->args()) { - if (Argument *NewArg = dyn_cast<Argument>(VMap[&OldArg])) { - NewArgAttrs[NewArg->getArgNo()] = - OldAttrs.getParamAttributes(OldArg.getArgNo()); - } - } - - NewFunc->setAttributes( - AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(), - OldAttrs.getRetAttributes(), NewArgAttrs)); - - bool MustCloneSP = - OldFunc->getParent() && OldFunc->getParent() == NewFunc->getParent(); - DISubprogram *SP = OldFunc->getSubprogram(); - if (SP) { - assert(!MustCloneSP || ModuleLevelChanges); - // Add mappings for some DebugInfo nodes that we don't want duplicated - // even if they're distinct. - auto &MD = VMap.MD(); - MD[SP->getUnit()].reset(SP->getUnit()); - MD[SP->getType()].reset(SP->getType()); - MD[SP->getFile()].reset(SP->getFile()); - // If we're not cloning into the same module, no need to clone the - // subprogram - if (!MustCloneSP) - MD[SP].reset(SP); - } - - SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; - OldFunc->getAllMetadata(MDs); - for (auto MD : MDs) { - NewFunc->addMetadata( - MD.first, - *MapMetadata(MD.second, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, - TypeMapper, Materializer)); - } - - // When we remap instructions, we want to avoid duplicating inlined - // DISubprograms, so record all subprograms we find as we duplicate - // instructions and then freeze them in the MD map. - // We also record information about dbg.value and dbg.declare to avoid - // duplicating the types. - DebugInfoFinder DIFinder; - - // Loop over all of the basic blocks in the function, cloning them as - // appropriate. Note that we save BE this way in order to handle cloning of - // recursive functions into themselves. - // - for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); - BI != BE; ++BI) { - const BasicBlock &BB = *BI; - - // Create a new basic block and copy instructions into it! - BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo, - ModuleLevelChanges ? &DIFinder : nullptr); - - // Add basic block mapping. - VMap[&BB] = CBB; - - // It is only legal to clone a function if a block address within that - // function is never referenced outside of the function. Given that, we - // want to map block addresses from the old function to block addresses in - // the clone. (This is different from the generic ValueMapper - // implementation, which generates an invalid blockaddress when - // cloning a function.) - if (BB.hasAddressTaken()) { - Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc), - const_cast<BasicBlock*>(&BB)); - VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB); - } - - // Note return instructions for the caller. - if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator())) - Returns.push_back(RI); - } - - for (DISubprogram *ISP : DIFinder.subprograms()) - if (ISP != SP) - VMap.MD()[ISP].reset(ISP); - - for (DICompileUnit *CU : DIFinder.compile_units()) - VMap.MD()[CU].reset(CU); - - for (DIType *Type : DIFinder.types()) - VMap.MD()[Type].reset(Type); - - // Loop over all of the instructions in the function, fixing up operand - // references as we go. This uses VMap to do all the hard work. - for (Function::iterator BB = - cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(), - BE = NewFunc->end(); - BB != BE; ++BB) - // Loop over all instructions, fixing each one as we find it... - for (Instruction &II : *BB) - RemapInstruction(&II, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, - TypeMapper, Materializer); -} - -/// Return a copy of the specified function and add it to that function's -/// module. Also, any references specified in the VMap are changed to refer to -/// their mapped value instead of the original one. If any of the arguments to -/// the function are in the VMap, the arguments are deleted from the resultant -/// function. The VMap is updated to include mappings from all of the -/// instructions and basicblocks in the function from their old to new values. -/// -Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap, - ClonedCodeInfo *CodeInfo) { - std::vector<Type*> ArgTypes; - - // The user might be deleting arguments to the function by specifying them in - // the VMap. If so, we need to not add the arguments to the arg ty vector - // - for (const Argument &I : F->args()) - if (VMap.count(&I) == 0) // Haven't mapped the argument to anything yet? - ArgTypes.push_back(I.getType()); - - // Create a new function type... - FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(), - ArgTypes, F->getFunctionType()->isVarArg()); - - // Create the new function... - Function *NewF = Function::Create(FTy, F->getLinkage(), F->getAddressSpace(), - F->getName(), F->getParent()); - - // Loop over the arguments, copying the names of the mapped arguments over... - Function::arg_iterator DestI = NewF->arg_begin(); - for (const Argument & I : F->args()) - if (VMap.count(&I) == 0) { // Is this argument preserved? - DestI->setName(I.getName()); // Copy the name over... - VMap[&I] = &*DestI++; // Add mapping to VMap - } - - SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. - CloneFunctionInto(NewF, F, VMap, F->getSubprogram() != nullptr, Returns, "", - CodeInfo); - - return NewF; -} - - - -namespace { - /// This is a private class used to implement CloneAndPruneFunctionInto. - struct PruningFunctionCloner { - Function *NewFunc; - const Function *OldFunc; - ValueToValueMapTy &VMap; - bool ModuleLevelChanges; - const char *NameSuffix; - ClonedCodeInfo *CodeInfo; - - public: - PruningFunctionCloner(Function *newFunc, const Function *oldFunc, - ValueToValueMapTy &valueMap, bool moduleLevelChanges, - const char *nameSuffix, ClonedCodeInfo *codeInfo) - : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), - ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix), - CodeInfo(codeInfo) {} - - /// The specified block is found to be reachable, clone it and - /// anything that it can reach. - void CloneBlock(const BasicBlock *BB, - BasicBlock::const_iterator StartingInst, - std::vector<const BasicBlock*> &ToClone); - }; -} - -/// The specified block is found to be reachable, clone it and -/// anything that it can reach. -void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, - BasicBlock::const_iterator StartingInst, - std::vector<const BasicBlock*> &ToClone){ - WeakTrackingVH &BBEntry = VMap[BB]; - - // Have we already cloned this block? - if (BBEntry) return; - - // Nope, clone it now. - BasicBlock *NewBB; - BBEntry = NewBB = BasicBlock::Create(BB->getContext()); - if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); - - // It is only legal to clone a function if a block address within that - // function is never referenced outside of the function. Given that, we - // want to map block addresses from the old function to block addresses in - // the clone. (This is different from the generic ValueMapper - // implementation, which generates an invalid blockaddress when - // cloning a function.) - // - // Note that we don't need to fix the mapping for unreachable blocks; - // the default mapping there is safe. - if (BB->hasAddressTaken()) { - Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc), - const_cast<BasicBlock*>(BB)); - VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB); - } - - bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; - - // Loop over all instructions, and copy them over, DCE'ing as we go. This - // loop doesn't include the terminator. - for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); - II != IE; ++II) { - - Instruction *NewInst = II->clone(); - - // Eagerly remap operands to the newly cloned instruction, except for PHI - // nodes for which we defer processing until we update the CFG. - if (!isa<PHINode>(NewInst)) { - RemapInstruction(NewInst, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); - - // If we can simplify this instruction to some other value, simply add - // a mapping to that value rather than inserting a new instruction into - // the basic block. - if (Value *V = - SimplifyInstruction(NewInst, BB->getModule()->getDataLayout())) { - // On the off-chance that this simplifies to an instruction in the old - // function, map it back into the new function. - if (NewFunc != OldFunc) - if (Value *MappedV = VMap.lookup(V)) - V = MappedV; - - if (!NewInst->mayHaveSideEffects()) { - VMap[&*II] = V; - NewInst->deleteValue(); - continue; - } - } - } - - if (II->hasName()) - NewInst->setName(II->getName()+NameSuffix); - VMap[&*II] = NewInst; // Add instruction map to value. - NewBB->getInstList().push_back(NewInst); - hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II)); - - if (CodeInfo) - if (auto CS = ImmutableCallSite(&*II)) - if (CS.hasOperandBundles()) - CodeInfo->OperandBundleCallSites.push_back(NewInst); - - if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { - if (isa<ConstantInt>(AI->getArraySize())) - hasStaticAllocas = true; - else - hasDynamicAllocas = true; - } - } - - // Finally, clone over the terminator. - const Instruction *OldTI = BB->getTerminator(); - bool TerminatorDone = false; - if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) { - if (BI->isConditional()) { - // If the condition was a known constant in the callee... - ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); - // Or is a known constant in the caller... - if (!Cond) { - Value *V = VMap.lookup(BI->getCondition()); - Cond = dyn_cast_or_null<ConstantInt>(V); - } - - // Constant fold to uncond branch! - if (Cond) { - BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue()); - VMap[OldTI] = BranchInst::Create(Dest, NewBB); - ToClone.push_back(Dest); - TerminatorDone = true; - } - } - } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) { - // If switching on a value known constant in the caller. - ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition()); - if (!Cond) { // Or known constant after constant prop in the callee... - Value *V = VMap.lookup(SI->getCondition()); - Cond = dyn_cast_or_null<ConstantInt>(V); - } - if (Cond) { // Constant fold to uncond branch! - SwitchInst::ConstCaseHandle Case = *SI->findCaseValue(Cond); - BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor()); - VMap[OldTI] = BranchInst::Create(Dest, NewBB); - ToClone.push_back(Dest); - TerminatorDone = true; - } - } - - if (!TerminatorDone) { - Instruction *NewInst = OldTI->clone(); - if (OldTI->hasName()) - NewInst->setName(OldTI->getName()+NameSuffix); - NewBB->getInstList().push_back(NewInst); - VMap[OldTI] = NewInst; // Add instruction map to value. - - if (CodeInfo) - if (auto CS = ImmutableCallSite(OldTI)) - if (CS.hasOperandBundles()) - CodeInfo->OperandBundleCallSites.push_back(NewInst); - - // Recursively clone any reachable successor blocks. - const Instruction *TI = BB->getTerminator(); - for (const BasicBlock *Succ : successors(TI)) - ToClone.push_back(Succ); - } - - if (CodeInfo) { - CodeInfo->ContainsCalls |= hasCalls; - CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; - CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && - BB != &BB->getParent()->front(); - } -} - -/// This works like CloneAndPruneFunctionInto, except that it does not clone the -/// entire function. Instead it starts at an instruction provided by the caller -/// and copies (and prunes) only the code reachable from that instruction. -void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, - const Instruction *StartingInst, - ValueToValueMapTy &VMap, - bool ModuleLevelChanges, - SmallVectorImpl<ReturnInst *> &Returns, - const char *NameSuffix, - ClonedCodeInfo *CodeInfo) { - assert(NameSuffix && "NameSuffix cannot be null!"); - - ValueMapTypeRemapper *TypeMapper = nullptr; - ValueMaterializer *Materializer = nullptr; - -#ifndef NDEBUG - // If the cloning starts at the beginning of the function, verify that - // the function arguments are mapped. - if (!StartingInst) - for (const Argument &II : OldFunc->args()) - assert(VMap.count(&II) && "No mapping from source argument specified!"); -#endif - - PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, - NameSuffix, CodeInfo); - const BasicBlock *StartingBB; - if (StartingInst) - StartingBB = StartingInst->getParent(); - else { - StartingBB = &OldFunc->getEntryBlock(); - StartingInst = &StartingBB->front(); - } - - // Clone the entry block, and anything recursively reachable from it. - std::vector<const BasicBlock*> CloneWorklist; - PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist); - while (!CloneWorklist.empty()) { - const BasicBlock *BB = CloneWorklist.back(); - CloneWorklist.pop_back(); - PFC.CloneBlock(BB, BB->begin(), CloneWorklist); - } - - // Loop over all of the basic blocks in the old function. If the block was - // reachable, we have cloned it and the old block is now in the value map: - // insert it into the new function in the right order. If not, ignore it. - // - // Defer PHI resolution until rest of function is resolved. - SmallVector<const PHINode*, 16> PHIToResolve; - for (const BasicBlock &BI : *OldFunc) { - Value *V = VMap.lookup(&BI); - BasicBlock *NewBB = cast_or_null<BasicBlock>(V); - if (!NewBB) continue; // Dead block. - - // Add the new block to the new function. - NewFunc->getBasicBlockList().push_back(NewBB); - - // Handle PHI nodes specially, as we have to remove references to dead - // blocks. - for (const PHINode &PN : BI.phis()) { - // PHI nodes may have been remapped to non-PHI nodes by the caller or - // during the cloning process. - if (isa<PHINode>(VMap[&PN])) - PHIToResolve.push_back(&PN); - else - break; - } - - // Finally, remap the terminator instructions, as those can't be remapped - // until all BBs are mapped. - RemapInstruction(NewBB->getTerminator(), VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, - TypeMapper, Materializer); - } - - // Defer PHI resolution until rest of function is resolved, PHI resolution - // requires the CFG to be up-to-date. - for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { - const PHINode *OPN = PHIToResolve[phino]; - unsigned NumPreds = OPN->getNumIncomingValues(); - const BasicBlock *OldBB = OPN->getParent(); - BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]); - - // Map operands for blocks that are live and remove operands for blocks - // that are dead. - for (; phino != PHIToResolve.size() && - PHIToResolve[phino]->getParent() == OldBB; ++phino) { - OPN = PHIToResolve[phino]; - PHINode *PN = cast<PHINode>(VMap[OPN]); - for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { - Value *V = VMap.lookup(PN->getIncomingBlock(pred)); - if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { - Value *InVal = MapValue(PN->getIncomingValue(pred), - VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); - assert(InVal && "Unknown input value?"); - PN->setIncomingValue(pred, InVal); - PN->setIncomingBlock(pred, MappedBlock); - } else { - PN->removeIncomingValue(pred, false); - --pred; // Revisit the next entry. - --e; - } - } - } - - // The loop above has removed PHI entries for those blocks that are dead - // and has updated others. However, if a block is live (i.e. copied over) - // but its terminator has been changed to not go to this block, then our - // phi nodes will have invalid entries. Update the PHI nodes in this - // case. - PHINode *PN = cast<PHINode>(NewBB->begin()); - NumPreds = pred_size(NewBB); - if (NumPreds != PN->getNumIncomingValues()) { - assert(NumPreds < PN->getNumIncomingValues()); - // Count how many times each predecessor comes to this block. - std::map<BasicBlock*, unsigned> PredCount; - for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); - PI != E; ++PI) - --PredCount[*PI]; - - // Figure out how many entries to remove from each PHI. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - ++PredCount[PN->getIncomingBlock(i)]; - - // At this point, the excess predecessor entries are positive in the - // map. Loop over all of the PHIs and remove excess predecessor - // entries. - BasicBlock::iterator I = NewBB->begin(); - for (; (PN = dyn_cast<PHINode>(I)); ++I) { - for (const auto &PCI : PredCount) { - BasicBlock *Pred = PCI.first; - for (unsigned NumToRemove = PCI.second; NumToRemove; --NumToRemove) - PN->removeIncomingValue(Pred, false); - } - } - } - - // If the loops above have made these phi nodes have 0 or 1 operand, - // replace them with undef or the input value. We must do this for - // correctness, because 0-operand phis are not valid. - PN = cast<PHINode>(NewBB->begin()); - if (PN->getNumIncomingValues() == 0) { - BasicBlock::iterator I = NewBB->begin(); - BasicBlock::const_iterator OldI = OldBB->begin(); - while ((PN = dyn_cast<PHINode>(I++))) { - Value *NV = UndefValue::get(PN->getType()); - PN->replaceAllUsesWith(NV); - assert(VMap[&*OldI] == PN && "VMap mismatch"); - VMap[&*OldI] = NV; - PN->eraseFromParent(); - ++OldI; - } - } - } - - // Make a second pass over the PHINodes now that all of them have been - // remapped into the new function, simplifying the PHINode and performing any - // recursive simplifications exposed. This will transparently update the - // WeakTrackingVH in the VMap. Notably, we rely on that so that if we coalesce - // two PHINodes, the iteration over the old PHIs remains valid, and the - // mapping will just map us to the new node (which may not even be a PHI - // node). - const DataLayout &DL = NewFunc->getParent()->getDataLayout(); - SmallSetVector<const Value *, 8> Worklist; - for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx) - if (isa<PHINode>(VMap[PHIToResolve[Idx]])) - Worklist.insert(PHIToResolve[Idx]); - - // Note that we must test the size on each iteration, the worklist can grow. - for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { - const Value *OrigV = Worklist[Idx]; - auto *I = dyn_cast_or_null<Instruction>(VMap.lookup(OrigV)); - if (!I) - continue; - - // Skip over non-intrinsic callsites, we don't want to remove any nodes from - // the CGSCC. - CallSite CS = CallSite(I); - if (CS && CS.getCalledFunction() && !CS.getCalledFunction()->isIntrinsic()) - continue; - - // See if this instruction simplifies. - Value *SimpleV = SimplifyInstruction(I, DL); - if (!SimpleV) - continue; - - // Stash away all the uses of the old instruction so we can check them for - // recursive simplifications after a RAUW. This is cheaper than checking all - // uses of To on the recursive step in most cases. - for (const User *U : OrigV->users()) - Worklist.insert(cast<Instruction>(U)); - - // Replace the instruction with its simplified value. - I->replaceAllUsesWith(SimpleV); - - // If the original instruction had no side effects, remove it. - if (isInstructionTriviallyDead(I)) - I->eraseFromParent(); - else - VMap[OrigV] = I; - } - - // Now that the inlined function body has been fully constructed, go through - // and zap unconditional fall-through branches. This happens all the time when - // specializing code: code specialization turns conditional branches into - // uncond branches, and this code folds them. - Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator(); - Function::iterator I = Begin; - while (I != NewFunc->end()) { - // We need to simplify conditional branches and switches with a constant - // operand. We try to prune these out when cloning, but if the - // simplification required looking through PHI nodes, those are only - // available after forming the full basic block. That may leave some here, - // and we still want to prune the dead code as early as possible. - // - // Do the folding before we check if the block is dead since we want code - // like - // bb: - // br i1 undef, label %bb, label %bb - // to be simplified to - // bb: - // br label %bb - // before we call I->getSinglePredecessor(). - ConstantFoldTerminator(&*I); - - // Check if this block has become dead during inlining or other - // simplifications. Note that the first block will appear dead, as it has - // not yet been wired up properly. - if (I != Begin && (pred_begin(&*I) == pred_end(&*I) || - I->getSinglePredecessor() == &*I)) { - BasicBlock *DeadBB = &*I++; - DeleteDeadBlock(DeadBB); - continue; - } - - BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); - if (!BI || BI->isConditional()) { ++I; continue; } - - BasicBlock *Dest = BI->getSuccessor(0); - if (!Dest->getSinglePredecessor()) { - ++I; continue; - } - - // We shouldn't be able to get single-entry PHI nodes here, as instsimplify - // above should have zapped all of them.. - assert(!isa<PHINode>(Dest->begin())); - - // We know all single-entry PHI nodes in the inlined function have been - // removed, so we just need to splice the blocks. - BI->eraseFromParent(); - - // Make all PHI nodes that referred to Dest now refer to I as their source. - Dest->replaceAllUsesWith(&*I); - - // Move all the instructions in the succ to the pred. - I->getInstList().splice(I->end(), Dest->getInstList()); - - // Remove the dest block. - Dest->eraseFromParent(); - - // Do not increment I, iteratively merge all things this block branches to. - } - - // Make a final pass over the basic blocks from the old function to gather - // any return instructions which survived folding. We have to do this here - // because we can iteratively remove and merge returns above. - for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(), - E = NewFunc->end(); - I != E; ++I) - if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) - Returns.push_back(RI); -} - - -/// This works exactly like CloneFunctionInto, -/// except that it does some simple constant prop and DCE on the fly. The -/// effect of this is to copy significantly less code in cases where (for -/// example) a function call with constant arguments is inlined, and those -/// constant arguments cause a significant amount of code in the callee to be -/// dead. Since this doesn't produce an exact copy of the input, it can't be -/// used for things like CloneFunction or CloneModule. -void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, - ValueToValueMapTy &VMap, - bool ModuleLevelChanges, - SmallVectorImpl<ReturnInst*> &Returns, - const char *NameSuffix, - ClonedCodeInfo *CodeInfo, - Instruction *TheCall) { - CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap, - ModuleLevelChanges, Returns, NameSuffix, CodeInfo); -} - -/// Remaps instructions in \p Blocks using the mapping in \p VMap. -void llvm::remapInstructionsInBlocks( - const SmallVectorImpl<BasicBlock *> &Blocks, ValueToValueMapTy &VMap) { - // Rewrite the code to refer to itself. - for (auto *BB : Blocks) - for (auto &Inst : *BB) - RemapInstruction(&Inst, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); -} - -/// Clones a loop \p OrigLoop. Returns the loop and the blocks in \p -/// Blocks. -/// -/// Updates LoopInfo and DominatorTree assuming the loop is dominated by block -/// \p LoopDomBB. Insert the new blocks before block specified in \p Before. -Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, - Loop *OrigLoop, ValueToValueMapTy &VMap, - const Twine &NameSuffix, LoopInfo *LI, - DominatorTree *DT, - SmallVectorImpl<BasicBlock *> &Blocks) { - Function *F = OrigLoop->getHeader()->getParent(); - Loop *ParentLoop = OrigLoop->getParentLoop(); - DenseMap<Loop *, Loop *> LMap; - - Loop *NewLoop = LI->AllocateLoop(); - LMap[OrigLoop] = NewLoop; - if (ParentLoop) - ParentLoop->addChildLoop(NewLoop); - else - LI->addTopLevelLoop(NewLoop); - - BasicBlock *OrigPH = OrigLoop->getLoopPreheader(); - assert(OrigPH && "No preheader"); - BasicBlock *NewPH = CloneBasicBlock(OrigPH, VMap, NameSuffix, F); - // To rename the loop PHIs. - VMap[OrigPH] = NewPH; - Blocks.push_back(NewPH); - - // Update LoopInfo. - if (ParentLoop) - ParentLoop->addBasicBlockToLoop(NewPH, *LI); - - // Update DominatorTree. - DT->addNewBlock(NewPH, LoopDomBB); - - for (Loop *CurLoop : OrigLoop->getLoopsInPreorder()) { - Loop *&NewLoop = LMap[CurLoop]; - if (!NewLoop) { - NewLoop = LI->AllocateLoop(); - - // Establish the parent/child relationship. - Loop *OrigParent = CurLoop->getParentLoop(); - assert(OrigParent && "Could not find the original parent loop"); - Loop *NewParentLoop = LMap[OrigParent]; - assert(NewParentLoop && "Could not find the new parent loop"); - - NewParentLoop->addChildLoop(NewLoop); - } - } - - for (BasicBlock *BB : OrigLoop->getBlocks()) { - Loop *CurLoop = LI->getLoopFor(BB); - Loop *&NewLoop = LMap[CurLoop]; - assert(NewLoop && "Expecting new loop to be allocated"); - - BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F); - VMap[BB] = NewBB; - - // Update LoopInfo. - NewLoop->addBasicBlockToLoop(NewBB, *LI); - if (BB == CurLoop->getHeader()) - NewLoop->moveToHeader(NewBB); - - // Add DominatorTree node. After seeing all blocks, update to correct - // IDom. - DT->addNewBlock(NewBB, NewPH); - - Blocks.push_back(NewBB); - } - - for (BasicBlock *BB : OrigLoop->getBlocks()) { - // Update DominatorTree. - BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock(); - DT->changeImmediateDominator(cast<BasicBlock>(VMap[BB]), - cast<BasicBlock>(VMap[IDomBB])); - } - - // Move them physically from the end of the block list. - F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(), - NewPH); - F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(), - NewLoop->getHeader()->getIterator(), F->end()); - - return NewLoop; -} - -/// Duplicate non-Phi instructions from the beginning of block up to -/// StopAt instruction into a split block between BB and its predecessor. -BasicBlock *llvm::DuplicateInstructionsInSplitBetween( - BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt, - ValueToValueMapTy &ValueMapping, DomTreeUpdater &DTU) { - - assert(count(successors(PredBB), BB) == 1 && - "There must be a single edge between PredBB and BB!"); - // We are going to have to map operands from the original BB block to the new - // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to - // account for entry from PredBB. - BasicBlock::iterator BI = BB->begin(); - for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) - ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); - - BasicBlock *NewBB = SplitEdge(PredBB, BB); - NewBB->setName(PredBB->getName() + ".split"); - Instruction *NewTerm = NewBB->getTerminator(); - - // FIXME: SplitEdge does not yet take a DTU, so we include the split edge - // in the update set here. - DTU.applyUpdates({{DominatorTree::Delete, PredBB, BB}, - {DominatorTree::Insert, PredBB, NewBB}, - {DominatorTree::Insert, NewBB, BB}}); - - // Clone the non-phi instructions of BB into NewBB, keeping track of the - // mapping and using it to remap operands in the cloned instructions. - // Stop once we see the terminator too. This covers the case where BB's - // terminator gets replaced and StopAt == BB's terminator. - for (; StopAt != &*BI && BB->getTerminator() != &*BI; ++BI) { - Instruction *New = BI->clone(); - New->setName(BI->getName()); - New->insertBefore(NewTerm); - ValueMapping[&*BI] = New; - - // Remap operands to patch up intra-block references. - for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) - if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) { - auto I = ValueMapping.find(Inst); - if (I != ValueMapping.end()) - New->setOperand(i, I->second); - } - } - - return NewBB; -} diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp deleted file mode 100644 index 7ddf59becba9..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp +++ /dev/null @@ -1,202 +0,0 @@ -//===- CloneModule.cpp - Clone an entire module ---------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the CloneModule interface which makes a copy of an -// entire module. -// -//===----------------------------------------------------------------------===// - -#include "llvm/IR/Constant.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Module.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -using namespace llvm; - -static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) { - const Comdat *SC = Src->getComdat(); - if (!SC) - return; - Comdat *DC = Dst->getParent()->getOrInsertComdat(SC->getName()); - DC->setSelectionKind(SC->getSelectionKind()); - Dst->setComdat(DC); -} - -/// This is not as easy as it might seem because we have to worry about making -/// copies of global variables and functions, and making their (initializers and -/// references, respectively) refer to the right globals. -/// -std::unique_ptr<Module> llvm::CloneModule(const Module &M) { - // Create the value map that maps things from the old module over to the new - // module. - ValueToValueMapTy VMap; - return CloneModule(M, VMap); -} - -std::unique_ptr<Module> llvm::CloneModule(const Module &M, - ValueToValueMapTy &VMap) { - return CloneModule(M, VMap, [](const GlobalValue *GV) { return true; }); -} - -std::unique_ptr<Module> llvm::CloneModule( - const Module &M, ValueToValueMapTy &VMap, - function_ref<bool(const GlobalValue *)> ShouldCloneDefinition) { - // First off, we need to create the new module. - std::unique_ptr<Module> New = - llvm::make_unique<Module>(M.getModuleIdentifier(), M.getContext()); - New->setSourceFileName(M.getSourceFileName()); - New->setDataLayout(M.getDataLayout()); - New->setTargetTriple(M.getTargetTriple()); - New->setModuleInlineAsm(M.getModuleInlineAsm()); - - // Loop over all of the global variables, making corresponding globals in the - // new module. Here we add them to the VMap and to the new Module. We - // don't worry about attributes or initializers, they will come later. - // - for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - GlobalVariable *GV = new GlobalVariable(*New, - I->getValueType(), - I->isConstant(), I->getLinkage(), - (Constant*) nullptr, I->getName(), - (GlobalVariable*) nullptr, - I->getThreadLocalMode(), - I->getType()->getAddressSpace()); - GV->copyAttributesFrom(&*I); - VMap[&*I] = GV; - } - - // Loop over the functions in the module, making external functions as before - for (const Function &I : M) { - Function *NF = - Function::Create(cast<FunctionType>(I.getValueType()), I.getLinkage(), - I.getAddressSpace(), I.getName(), New.get()); - NF->copyAttributesFrom(&I); - VMap[&I] = NF; - } - - // Loop over the aliases in the module - for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); - I != E; ++I) { - if (!ShouldCloneDefinition(&*I)) { - // An alias cannot act as an external reference, so we need to create - // either a function or a global variable depending on the value type. - // FIXME: Once pointee types are gone we can probably pick one or the - // other. - GlobalValue *GV; - if (I->getValueType()->isFunctionTy()) - GV = Function::Create(cast<FunctionType>(I->getValueType()), - GlobalValue::ExternalLinkage, - I->getAddressSpace(), I->getName(), New.get()); - else - GV = new GlobalVariable( - *New, I->getValueType(), false, GlobalValue::ExternalLinkage, - nullptr, I->getName(), nullptr, - I->getThreadLocalMode(), I->getType()->getAddressSpace()); - VMap[&*I] = GV; - // We do not copy attributes (mainly because copying between different - // kinds of globals is forbidden), but this is generally not required for - // correctness. - continue; - } - auto *GA = GlobalAlias::create(I->getValueType(), - I->getType()->getPointerAddressSpace(), - I->getLinkage(), I->getName(), New.get()); - GA->copyAttributesFrom(&*I); - VMap[&*I] = GA; - } - - // Now that all of the things that global variable initializer can refer to - // have been created, loop through and copy the global variable referrers - // over... We also set the attributes on the global now. - // - for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - if (I->isDeclaration()) - continue; - - GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]); - if (!ShouldCloneDefinition(&*I)) { - // Skip after setting the correct linkage for an external reference. - GV->setLinkage(GlobalValue::ExternalLinkage); - continue; - } - if (I->hasInitializer()) - GV->setInitializer(MapValue(I->getInitializer(), VMap)); - - SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; - I->getAllMetadata(MDs); - for (auto MD : MDs) - GV->addMetadata(MD.first, - *MapMetadata(MD.second, VMap, RF_MoveDistinctMDs)); - - copyComdat(GV, &*I); - } - - // Similarly, copy over function bodies now... - // - for (const Function &I : M) { - if (I.isDeclaration()) - continue; - - Function *F = cast<Function>(VMap[&I]); - if (!ShouldCloneDefinition(&I)) { - // Skip after setting the correct linkage for an external reference. - F->setLinkage(GlobalValue::ExternalLinkage); - // Personality function is not valid on a declaration. - F->setPersonalityFn(nullptr); - continue; - } - - Function::arg_iterator DestI = F->arg_begin(); - for (Function::const_arg_iterator J = I.arg_begin(); J != I.arg_end(); - ++J) { - DestI->setName(J->getName()); - VMap[&*J] = &*DestI++; - } - - SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned. - CloneFunctionInto(F, &I, VMap, /*ModuleLevelChanges=*/true, Returns); - - if (I.hasPersonalityFn()) - F->setPersonalityFn(MapValue(I.getPersonalityFn(), VMap)); - - copyComdat(F, &I); - } - - // And aliases - for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); - I != E; ++I) { - // We already dealt with undefined aliases above. - if (!ShouldCloneDefinition(&*I)) - continue; - GlobalAlias *GA = cast<GlobalAlias>(VMap[&*I]); - if (const Constant *C = I->getAliasee()) - GA->setAliasee(MapValue(C, VMap)); - } - - // And named metadata.... - for (Module::const_named_metadata_iterator I = M.named_metadata_begin(), - E = M.named_metadata_end(); - I != E; ++I) { - const NamedMDNode &NMD = *I; - NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName()); - for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) - NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap)); - } - - return New; -} - -extern "C" { - -LLVMModuleRef LLVMCloneModule(LLVMModuleRef M) { - return wrap(CloneModule(*unwrap(M)).release()); -} - -} diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp deleted file mode 100644 index fa6d3f8ae873..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ /dev/null @@ -1,1567 +0,0 @@ -//===- CodeExtractor.cpp - Pull code region into a new function -----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the interface to tear out a code region, such as an -// individual loop or a parallel section, into a new function, replacing it with -// a call to the new function. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/CodeExtractor.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/BlockFrequencyInfoImpl.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/IR/Argument.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/User.h" -#include "llvm/IR/Value.h" -#include "llvm/IR/Verifier.h" -#include "llvm/Pass.h" -#include "llvm/Support/BlockFrequency.h" -#include "llvm/Support/BranchProbability.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include <cassert> -#include <cstdint> -#include <iterator> -#include <map> -#include <set> -#include <utility> -#include <vector> - -using namespace llvm; -using namespace llvm::PatternMatch; -using ProfileCount = Function::ProfileCount; - -#define DEBUG_TYPE "code-extractor" - -// Provide a command-line option to aggregate function arguments into a struct -// for functions produced by the code extractor. This is useful when converting -// extracted functions to pthread-based code, as only one argument (void*) can -// be passed in to pthread_create(). -static cl::opt<bool> -AggregateArgsOpt("aggregate-extracted-args", cl::Hidden, - cl::desc("Aggregate arguments to code-extracted functions")); - -/// Test whether a block is valid for extraction. -static bool isBlockValidForExtraction(const BasicBlock &BB, - const SetVector<BasicBlock *> &Result, - bool AllowVarArgs, bool AllowAlloca) { - // taking the address of a basic block moved to another function is illegal - if (BB.hasAddressTaken()) - return false; - - // don't hoist code that uses another basicblock address, as it's likely to - // lead to unexpected behavior, like cross-function jumps - SmallPtrSet<User const *, 16> Visited; - SmallVector<User const *, 16> ToVisit; - - for (Instruction const &Inst : BB) - ToVisit.push_back(&Inst); - - while (!ToVisit.empty()) { - User const *Curr = ToVisit.pop_back_val(); - if (!Visited.insert(Curr).second) - continue; - if (isa<BlockAddress const>(Curr)) - return false; // even a reference to self is likely to be not compatible - - if (isa<Instruction>(Curr) && cast<Instruction>(Curr)->getParent() != &BB) - continue; - - for (auto const &U : Curr->operands()) { - if (auto *UU = dyn_cast<User>(U)) - ToVisit.push_back(UU); - } - } - - // If explicitly requested, allow vastart and alloca. For invoke instructions - // verify that extraction is valid. - for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) { - if (isa<AllocaInst>(I)) { - if (!AllowAlloca) - return false; - continue; - } - - if (const auto *II = dyn_cast<InvokeInst>(I)) { - // Unwind destination (either a landingpad, catchswitch, or cleanuppad) - // must be a part of the subgraph which is being extracted. - if (auto *UBB = II->getUnwindDest()) - if (!Result.count(UBB)) - return false; - continue; - } - - // All catch handlers of a catchswitch instruction as well as the unwind - // destination must be in the subgraph. - if (const auto *CSI = dyn_cast<CatchSwitchInst>(I)) { - if (auto *UBB = CSI->getUnwindDest()) - if (!Result.count(UBB)) - return false; - for (auto *HBB : CSI->handlers()) - if (!Result.count(const_cast<BasicBlock*>(HBB))) - return false; - continue; - } - - // Make sure that entire catch handler is within subgraph. It is sufficient - // to check that catch return's block is in the list. - if (const auto *CPI = dyn_cast<CatchPadInst>(I)) { - for (const auto *U : CPI->users()) - if (const auto *CRI = dyn_cast<CatchReturnInst>(U)) - if (!Result.count(const_cast<BasicBlock*>(CRI->getParent()))) - return false; - continue; - } - - // And do similar checks for cleanup handler - the entire handler must be - // in subgraph which is going to be extracted. For cleanup return should - // additionally check that the unwind destination is also in the subgraph. - if (const auto *CPI = dyn_cast<CleanupPadInst>(I)) { - for (const auto *U : CPI->users()) - if (const auto *CRI = dyn_cast<CleanupReturnInst>(U)) - if (!Result.count(const_cast<BasicBlock*>(CRI->getParent()))) - return false; - continue; - } - if (const auto *CRI = dyn_cast<CleanupReturnInst>(I)) { - if (auto *UBB = CRI->getUnwindDest()) - if (!Result.count(UBB)) - return false; - continue; - } - - if (const CallInst *CI = dyn_cast<CallInst>(I)) { - if (const Function *F = CI->getCalledFunction()) { - auto IID = F->getIntrinsicID(); - if (IID == Intrinsic::vastart) { - if (AllowVarArgs) - continue; - else - return false; - } - - // Currently, we miscompile outlined copies of eh_typid_for. There are - // proposals for fixing this in llvm.org/PR39545. - if (IID == Intrinsic::eh_typeid_for) - return false; - } - } - } - - return true; -} - -/// Build a set of blocks to extract if the input blocks are viable. -static SetVector<BasicBlock *> -buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, - bool AllowVarArgs, bool AllowAlloca) { - assert(!BBs.empty() && "The set of blocks to extract must be non-empty"); - SetVector<BasicBlock *> Result; - - // Loop over the blocks, adding them to our set-vector, and aborting with an - // empty set if we encounter invalid blocks. - for (BasicBlock *BB : BBs) { - // If this block is dead, don't process it. - if (DT && !DT->isReachableFromEntry(BB)) - continue; - - if (!Result.insert(BB)) - llvm_unreachable("Repeated basic blocks in extraction input"); - } - - LLVM_DEBUG(dbgs() << "Region front block: " << Result.front()->getName() - << '\n'); - - for (auto *BB : Result) { - if (!isBlockValidForExtraction(*BB, Result, AllowVarArgs, AllowAlloca)) - return {}; - - // Make sure that the first block is not a landing pad. - if (BB == Result.front()) { - if (BB->isEHPad()) { - LLVM_DEBUG(dbgs() << "The first block cannot be an unwind block\n"); - return {}; - } - continue; - } - - // All blocks other than the first must not have predecessors outside of - // the subgraph which is being extracted. - for (auto *PBB : predecessors(BB)) - if (!Result.count(PBB)) { - LLVM_DEBUG(dbgs() << "No blocks in this region may have entries from " - "outside the region except for the first block!\n" - << "Problematic source BB: " << BB->getName() << "\n" - << "Problematic destination BB: " << PBB->getName() - << "\n"); - return {}; - } - } - - return Result; -} - -CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, - bool AggregateArgs, BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI, AssumptionCache *AC, - bool AllowVarArgs, bool AllowAlloca, - std::string Suffix) - : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), AC(AC), AllowVarArgs(AllowVarArgs), - Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)), - Suffix(Suffix) {} - -CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, - BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI, AssumptionCache *AC, - std::string Suffix) - : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), AC(AC), AllowVarArgs(false), - Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, - /* AllowVarArgs */ false, - /* AllowAlloca */ false)), - Suffix(Suffix) {} - -/// definedInRegion - Return true if the specified value is defined in the -/// extracted region. -static bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) { - if (Instruction *I = dyn_cast<Instruction>(V)) - if (Blocks.count(I->getParent())) - return true; - return false; -} - -/// definedInCaller - Return true if the specified value is defined in the -/// function being code extracted, but not in the region being extracted. -/// These values must be passed in as live-ins to the function. -static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) { - if (isa<Argument>(V)) return true; - if (Instruction *I = dyn_cast<Instruction>(V)) - if (!Blocks.count(I->getParent())) - return true; - return false; -} - -static BasicBlock *getCommonExitBlock(const SetVector<BasicBlock *> &Blocks) { - BasicBlock *CommonExitBlock = nullptr; - auto hasNonCommonExitSucc = [&](BasicBlock *Block) { - for (auto *Succ : successors(Block)) { - // Internal edges, ok. - if (Blocks.count(Succ)) - continue; - if (!CommonExitBlock) { - CommonExitBlock = Succ; - continue; - } - if (CommonExitBlock == Succ) - continue; - - return true; - } - return false; - }; - - if (any_of(Blocks, hasNonCommonExitSucc)) - return nullptr; - - return CommonExitBlock; -} - -bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers( - Instruction *Addr) const { - AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets()); - Function *Func = (*Blocks.begin())->getParent(); - for (BasicBlock &BB : *Func) { - if (Blocks.count(&BB)) - continue; - for (Instruction &II : BB) { - if (isa<DbgInfoIntrinsic>(II)) - continue; - - unsigned Opcode = II.getOpcode(); - Value *MemAddr = nullptr; - switch (Opcode) { - case Instruction::Store: - case Instruction::Load: { - if (Opcode == Instruction::Store) { - StoreInst *SI = cast<StoreInst>(&II); - MemAddr = SI->getPointerOperand(); - } else { - LoadInst *LI = cast<LoadInst>(&II); - MemAddr = LI->getPointerOperand(); - } - // Global variable can not be aliased with locals. - if (dyn_cast<Constant>(MemAddr)) - break; - Value *Base = MemAddr->stripInBoundsConstantOffsets(); - if (!isa<AllocaInst>(Base) || Base == AI) - return false; - break; - } - default: { - IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II); - if (IntrInst) { - if (IntrInst->isLifetimeStartOrEnd()) - break; - return false; - } - // Treat all the other cases conservatively if it has side effects. - if (II.mayHaveSideEffects()) - return false; - } - } - } - } - - return true; -} - -BasicBlock * -CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) { - BasicBlock *SinglePredFromOutlineRegion = nullptr; - assert(!Blocks.count(CommonExitBlock) && - "Expect a block outside the region!"); - for (auto *Pred : predecessors(CommonExitBlock)) { - if (!Blocks.count(Pred)) - continue; - if (!SinglePredFromOutlineRegion) { - SinglePredFromOutlineRegion = Pred; - } else if (SinglePredFromOutlineRegion != Pred) { - SinglePredFromOutlineRegion = nullptr; - break; - } - } - - if (SinglePredFromOutlineRegion) - return SinglePredFromOutlineRegion; - -#ifndef NDEBUG - auto getFirstPHI = [](BasicBlock *BB) { - BasicBlock::iterator I = BB->begin(); - PHINode *FirstPhi = nullptr; - while (I != BB->end()) { - PHINode *Phi = dyn_cast<PHINode>(I); - if (!Phi) - break; - if (!FirstPhi) { - FirstPhi = Phi; - break; - } - } - return FirstPhi; - }; - // If there are any phi nodes, the single pred either exists or has already - // be created before code extraction. - assert(!getFirstPHI(CommonExitBlock) && "Phi not expected"); -#endif - - BasicBlock *NewExitBlock = CommonExitBlock->splitBasicBlock( - CommonExitBlock->getFirstNonPHI()->getIterator()); - - for (auto PI = pred_begin(CommonExitBlock), PE = pred_end(CommonExitBlock); - PI != PE;) { - BasicBlock *Pred = *PI++; - if (Blocks.count(Pred)) - continue; - Pred->getTerminator()->replaceUsesOfWith(CommonExitBlock, NewExitBlock); - } - // Now add the old exit block to the outline region. - Blocks.insert(CommonExitBlock); - return CommonExitBlock; -} - -// Find the pair of life time markers for address 'Addr' that are either -// defined inside the outline region or can legally be shrinkwrapped into the -// outline region. If there are not other untracked uses of the address, return -// the pair of markers if found; otherwise return a pair of nullptr. -CodeExtractor::LifetimeMarkerInfo -CodeExtractor::getLifetimeMarkers(Instruction *Addr, - BasicBlock *ExitBlock) const { - LifetimeMarkerInfo Info; - - for (User *U : Addr->users()) { - IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U); - if (IntrInst) { - if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) { - // Do not handle the case where Addr has multiple start markers. - if (Info.LifeStart) - return {}; - Info.LifeStart = IntrInst; - } - if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) { - if (Info.LifeEnd) - return {}; - Info.LifeEnd = IntrInst; - } - continue; - } - // Find untracked uses of the address, bail. - if (!definedInRegion(Blocks, U)) - return {}; - } - - if (!Info.LifeStart || !Info.LifeEnd) - return {}; - - Info.SinkLifeStart = !definedInRegion(Blocks, Info.LifeStart); - Info.HoistLifeEnd = !definedInRegion(Blocks, Info.LifeEnd); - // Do legality check. - if ((Info.SinkLifeStart || Info.HoistLifeEnd) && - !isLegalToShrinkwrapLifetimeMarkers(Addr)) - return {}; - - // Check to see if we have a place to do hoisting, if not, bail. - if (Info.HoistLifeEnd && !ExitBlock) - return {}; - - return Info; -} - -void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, - BasicBlock *&ExitBlock) const { - Function *Func = (*Blocks.begin())->getParent(); - ExitBlock = getCommonExitBlock(Blocks); - - auto moveOrIgnoreLifetimeMarkers = - [&](const LifetimeMarkerInfo &LMI) -> bool { - if (!LMI.LifeStart) - return false; - if (LMI.SinkLifeStart) { - LLVM_DEBUG(dbgs() << "Sinking lifetime.start: " << *LMI.LifeStart - << "\n"); - SinkCands.insert(LMI.LifeStart); - } - if (LMI.HoistLifeEnd) { - LLVM_DEBUG(dbgs() << "Hoisting lifetime.end: " << *LMI.LifeEnd << "\n"); - HoistCands.insert(LMI.LifeEnd); - } - return true; - }; - - for (BasicBlock &BB : *Func) { - if (Blocks.count(&BB)) - continue; - for (Instruction &II : BB) { - auto *AI = dyn_cast<AllocaInst>(&II); - if (!AI) - continue; - - LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(AI, ExitBlock); - bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo); - if (Moved) { - LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n"); - SinkCands.insert(AI); - continue; - } - - // Follow any bitcasts. - SmallVector<Instruction *, 2> Bitcasts; - SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo; - for (User *U : AI->users()) { - if (U->stripInBoundsConstantOffsets() == AI) { - Instruction *Bitcast = cast<Instruction>(U); - LifetimeMarkerInfo LMI = getLifetimeMarkers(Bitcast, ExitBlock); - if (LMI.LifeStart) { - Bitcasts.push_back(Bitcast); - BitcastLifetimeInfo.push_back(LMI); - continue; - } - } - - // Found unknown use of AI. - if (!definedInRegion(Blocks, U)) { - Bitcasts.clear(); - break; - } - } - - // Either no bitcasts reference the alloca or there are unknown uses. - if (Bitcasts.empty()) - continue; - - LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n"); - SinkCands.insert(AI); - for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) { - Instruction *BitcastAddr = Bitcasts[I]; - const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I]; - assert(LMI.LifeStart && - "Unsafe to sink bitcast without lifetime markers"); - moveOrIgnoreLifetimeMarkers(LMI); - if (!definedInRegion(Blocks, BitcastAddr)) { - LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr - << "\n"); - SinkCands.insert(BitcastAddr); - } - } - } - } -} - -void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, - const ValueSet &SinkCands) const { - for (BasicBlock *BB : Blocks) { - // If a used value is defined outside the region, it's an input. If an - // instruction is used outside the region, it's an output. - for (Instruction &II : *BB) { - for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE; - ++OI) { - Value *V = *OI; - if (!SinkCands.count(V) && definedInCaller(Blocks, V)) - Inputs.insert(V); - } - - for (User *U : II.users()) - if (!definedInRegion(Blocks, U)) { - Outputs.insert(&II); - break; - } - } - } -} - -/// severSplitPHINodesOfEntry - If a PHI node has multiple inputs from outside -/// of the region, we need to split the entry block of the region so that the -/// PHI node is easier to deal with. -void CodeExtractor::severSplitPHINodesOfEntry(BasicBlock *&Header) { - unsigned NumPredsFromRegion = 0; - unsigned NumPredsOutsideRegion = 0; - - if (Header != &Header->getParent()->getEntryBlock()) { - PHINode *PN = dyn_cast<PHINode>(Header->begin()); - if (!PN) return; // No PHI nodes. - - // If the header node contains any PHI nodes, check to see if there is more - // than one entry from outside the region. If so, we need to sever the - // header block into two. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (Blocks.count(PN->getIncomingBlock(i))) - ++NumPredsFromRegion; - else - ++NumPredsOutsideRegion; - - // If there is one (or fewer) predecessor from outside the region, we don't - // need to do anything special. - if (NumPredsOutsideRegion <= 1) return; - } - - // Otherwise, we need to split the header block into two pieces: one - // containing PHI nodes merging values from outside of the region, and a - // second that contains all of the code for the block and merges back any - // incoming values from inside of the region. - BasicBlock *NewBB = SplitBlock(Header, Header->getFirstNonPHI(), DT); - - // We only want to code extract the second block now, and it becomes the new - // header of the region. - BasicBlock *OldPred = Header; - Blocks.remove(OldPred); - Blocks.insert(NewBB); - Header = NewBB; - - // Okay, now we need to adjust the PHI nodes and any branches from within the - // region to go to the new header block instead of the old header block. - if (NumPredsFromRegion) { - PHINode *PN = cast<PHINode>(OldPred->begin()); - // Loop over all of the predecessors of OldPred that are in the region, - // changing them to branch to NewBB instead. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (Blocks.count(PN->getIncomingBlock(i))) { - Instruction *TI = PN->getIncomingBlock(i)->getTerminator(); - TI->replaceUsesOfWith(OldPred, NewBB); - } - - // Okay, everything within the region is now branching to the right block, we - // just have to update the PHI nodes now, inserting PHI nodes into NewBB. - BasicBlock::iterator AfterPHIs; - for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) { - PHINode *PN = cast<PHINode>(AfterPHIs); - // Create a new PHI node in the new region, which has an incoming value - // from OldPred of PN. - PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, - PN->getName() + ".ce", &NewBB->front()); - PN->replaceAllUsesWith(NewPN); - NewPN->addIncoming(PN, OldPred); - - // Loop over all of the incoming value in PN, moving them to NewPN if they - // are from the extracted region. - for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { - if (Blocks.count(PN->getIncomingBlock(i))) { - NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i)); - PN->removeIncomingValue(i); - --i; - } - } - } - } -} - -/// severSplitPHINodesOfExits - if PHI nodes in exit blocks have inputs from -/// outlined region, we split these PHIs on two: one with inputs from region -/// and other with remaining incoming blocks; then first PHIs are placed in -/// outlined region. -void CodeExtractor::severSplitPHINodesOfExits( - const SmallPtrSetImpl<BasicBlock *> &Exits) { - for (BasicBlock *ExitBB : Exits) { - BasicBlock *NewBB = nullptr; - - for (PHINode &PN : ExitBB->phis()) { - // Find all incoming values from the outlining region. - SmallVector<unsigned, 2> IncomingVals; - for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i) - if (Blocks.count(PN.getIncomingBlock(i))) - IncomingVals.push_back(i); - - // Do not process PHI if there is one (or fewer) predecessor from region. - // If PHI has exactly one predecessor from region, only this one incoming - // will be replaced on codeRepl block, so it should be safe to skip PHI. - if (IncomingVals.size() <= 1) - continue; - - // Create block for new PHIs and add it to the list of outlined if it - // wasn't done before. - if (!NewBB) { - NewBB = BasicBlock::Create(ExitBB->getContext(), - ExitBB->getName() + ".split", - ExitBB->getParent(), ExitBB); - SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBB), - pred_end(ExitBB)); - for (BasicBlock *PredBB : Preds) - if (Blocks.count(PredBB)) - PredBB->getTerminator()->replaceUsesOfWith(ExitBB, NewBB); - BranchInst::Create(ExitBB, NewBB); - Blocks.insert(NewBB); - } - - // Split this PHI. - PHINode *NewPN = - PHINode::Create(PN.getType(), IncomingVals.size(), - PN.getName() + ".ce", NewBB->getFirstNonPHI()); - for (unsigned i : IncomingVals) - NewPN->addIncoming(PN.getIncomingValue(i), PN.getIncomingBlock(i)); - for (unsigned i : reverse(IncomingVals)) - PN.removeIncomingValue(i, false); - PN.addIncoming(NewPN, NewBB); - } - } -} - -void CodeExtractor::splitReturnBlocks() { - for (BasicBlock *Block : Blocks) - if (ReturnInst *RI = dyn_cast<ReturnInst>(Block->getTerminator())) { - BasicBlock *New = - Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret"); - if (DT) { - // Old dominates New. New node dominates all other nodes dominated - // by Old. - DomTreeNode *OldNode = DT->getNode(Block); - SmallVector<DomTreeNode *, 8> Children(OldNode->begin(), - OldNode->end()); - - DomTreeNode *NewNode = DT->addNewBlock(New, Block); - - for (DomTreeNode *I : Children) - DT->changeImmediateDominator(I, NewNode); - } - } -} - -/// constructFunction - make a function based on inputs and outputs, as follows: -/// f(in0, ..., inN, out0, ..., outN) -Function *CodeExtractor::constructFunction(const ValueSet &inputs, - const ValueSet &outputs, - BasicBlock *header, - BasicBlock *newRootNode, - BasicBlock *newHeader, - Function *oldFunction, - Module *M) { - LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); - LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); - - // This function returns unsigned, outputs will go back by reference. - switch (NumExitBlocks) { - case 0: - case 1: RetTy = Type::getVoidTy(header->getContext()); break; - case 2: RetTy = Type::getInt1Ty(header->getContext()); break; - default: RetTy = Type::getInt16Ty(header->getContext()); break; - } - - std::vector<Type *> paramTy; - - // Add the types of the input values to the function's argument list - for (Value *value : inputs) { - LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n"); - paramTy.push_back(value->getType()); - } - - // Add the types of the output values to the function's argument list. - for (Value *output : outputs) { - LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n"); - if (AggregateArgs) - paramTy.push_back(output->getType()); - else - paramTy.push_back(PointerType::getUnqual(output->getType())); - } - - LLVM_DEBUG({ - dbgs() << "Function type: " << *RetTy << " f("; - for (Type *i : paramTy) - dbgs() << *i << ", "; - dbgs() << ")\n"; - }); - - StructType *StructTy; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - StructTy = StructType::get(M->getContext(), paramTy); - paramTy.clear(); - paramTy.push_back(PointerType::getUnqual(StructTy)); - } - FunctionType *funcType = - FunctionType::get(RetTy, paramTy, - AllowVarArgs && oldFunction->isVarArg()); - - std::string SuffixToUse = - Suffix.empty() - ? (header->getName().empty() ? "extracted" : header->getName().str()) - : Suffix; - // Create the new function - Function *newFunction = Function::Create( - funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(), - oldFunction->getName() + "." + SuffixToUse, M); - // If the old function is no-throw, so is the new one. - if (oldFunction->doesNotThrow()) - newFunction->setDoesNotThrow(); - - // Inherit the uwtable attribute if we need to. - if (oldFunction->hasUWTable()) - newFunction->setHasUWTable(); - - // Inherit all of the target dependent attributes and white-listed - // target independent attributes. - // (e.g. If the extracted region contains a call to an x86.sse - // instruction we need to make sure that the extracted region has the - // "target-features" attribute allowing it to be lowered. - // FIXME: This should be changed to check to see if a specific - // attribute can not be inherited. - for (const auto &Attr : oldFunction->getAttributes().getFnAttributes()) { - if (Attr.isStringAttribute()) { - if (Attr.getKindAsString() == "thunk") - continue; - } else - switch (Attr.getKindAsEnum()) { - // Those attributes cannot be propagated safely. Explicitly list them - // here so we get a warning if new attributes are added. This list also - // includes non-function attributes. - case Attribute::Alignment: - case Attribute::AllocSize: - case Attribute::ArgMemOnly: - case Attribute::Builtin: - case Attribute::ByVal: - case Attribute::Convergent: - case Attribute::Dereferenceable: - case Attribute::DereferenceableOrNull: - case Attribute::InAlloca: - case Attribute::InReg: - case Attribute::InaccessibleMemOnly: - case Attribute::InaccessibleMemOrArgMemOnly: - case Attribute::JumpTable: - case Attribute::Naked: - case Attribute::Nest: - case Attribute::NoAlias: - case Attribute::NoBuiltin: - case Attribute::NoCapture: - case Attribute::NoReturn: - case Attribute::NoSync: - case Attribute::None: - case Attribute::NonNull: - case Attribute::ReadNone: - case Attribute::ReadOnly: - case Attribute::Returned: - case Attribute::ReturnsTwice: - case Attribute::SExt: - case Attribute::Speculatable: - case Attribute::StackAlignment: - case Attribute::StructRet: - case Attribute::SwiftError: - case Attribute::SwiftSelf: - case Attribute::WillReturn: - case Attribute::WriteOnly: - case Attribute::ZExt: - case Attribute::ImmArg: - case Attribute::EndAttrKinds: - continue; - // Those attributes should be safe to propagate to the extracted function. - case Attribute::AlwaysInline: - case Attribute::Cold: - case Attribute::NoRecurse: - case Attribute::InlineHint: - case Attribute::MinSize: - case Attribute::NoDuplicate: - case Attribute::NoFree: - case Attribute::NoImplicitFloat: - case Attribute::NoInline: - case Attribute::NonLazyBind: - case Attribute::NoRedZone: - case Attribute::NoUnwind: - case Attribute::OptForFuzzing: - case Attribute::OptimizeNone: - case Attribute::OptimizeForSize: - case Attribute::SafeStack: - case Attribute::ShadowCallStack: - case Attribute::SanitizeAddress: - case Attribute::SanitizeMemory: - case Attribute::SanitizeThread: - case Attribute::SanitizeHWAddress: - case Attribute::SanitizeMemTag: - case Attribute::SpeculativeLoadHardening: - case Attribute::StackProtect: - case Attribute::StackProtectReq: - case Attribute::StackProtectStrong: - case Attribute::StrictFP: - case Attribute::UWTable: - case Attribute::NoCfCheck: - break; - } - - newFunction->addFnAttr(Attr); - } - newFunction->getBasicBlockList().push_back(newRootNode); - - // Create an iterator to name all of the arguments we inserted. - Function::arg_iterator AI = newFunction->arg_begin(); - - // Rewrite all users of the inputs in the extracted region to use the - // arguments (or appropriate addressing into struct) instead. - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *RewriteVal; - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); - Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); - Instruction *TI = newFunction->begin()->getTerminator(); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); - RewriteVal = new LoadInst(StructTy->getElementType(i), GEP, - "loadgep_" + inputs[i]->getName(), TI); - } else - RewriteVal = &*AI++; - - std::vector<User *> Users(inputs[i]->user_begin(), inputs[i]->user_end()); - for (User *use : Users) - if (Instruction *inst = dyn_cast<Instruction>(use)) - if (Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(inputs[i], RewriteVal); - } - - // Set names for input and output arguments. - if (!AggregateArgs) { - AI = newFunction->arg_begin(); - for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) - AI->setName(inputs[i]->getName()); - for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) - AI->setName(outputs[i]->getName()+".out"); - } - - // Rewrite branches to basic blocks outside of the loop to new dummy blocks - // within the new function. This must be done before we lose track of which - // blocks were originally in the code region. - std::vector<User *> Users(header->user_begin(), header->user_end()); - for (unsigned i = 0, e = Users.size(); i != e; ++i) - // The BasicBlock which contains the branch is not in the region - // modify the branch target to a new block - if (Instruction *I = dyn_cast<Instruction>(Users[i])) - if (I->isTerminator() && !Blocks.count(I->getParent()) && - I->getParent()->getParent() == oldFunction) - I->replaceUsesOfWith(header, newHeader); - - return newFunction; -} - -/// Erase lifetime.start markers which reference inputs to the extraction -/// region, and insert the referenced memory into \p LifetimesStart. -/// -/// The extraction region is defined by a set of blocks (\p Blocks), and a set -/// of allocas which will be moved from the caller function into the extracted -/// function (\p SunkAllocas). -static void eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks, - const SetVector<Value *> &SunkAllocas, - SetVector<Value *> &LifetimesStart) { - for (BasicBlock *BB : Blocks) { - for (auto It = BB->begin(), End = BB->end(); It != End;) { - auto *II = dyn_cast<IntrinsicInst>(&*It); - ++It; - if (!II || !II->isLifetimeStartOrEnd()) - continue; - - // Get the memory operand of the lifetime marker. If the underlying - // object is a sunk alloca, or is otherwise defined in the extraction - // region, the lifetime marker must not be erased. - Value *Mem = II->getOperand(1)->stripInBoundsOffsets(); - if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem)) - continue; - - if (II->getIntrinsicID() == Intrinsic::lifetime_start) - LifetimesStart.insert(Mem); - II->eraseFromParent(); - } - } -} - -/// Insert lifetime start/end markers surrounding the call to the new function -/// for objects defined in the caller. -static void insertLifetimeMarkersSurroundingCall( - Module *M, ArrayRef<Value *> LifetimesStart, ArrayRef<Value *> LifetimesEnd, - CallInst *TheCall) { - LLVMContext &Ctx = M->getContext(); - auto Int8PtrTy = Type::getInt8PtrTy(Ctx); - auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1); - Instruction *Term = TheCall->getParent()->getTerminator(); - - // The memory argument to a lifetime marker must be a i8*. Cache any bitcasts - // needed to satisfy this requirement so they may be reused. - DenseMap<Value *, Value *> Bitcasts; - - // Emit lifetime markers for the pointers given in \p Objects. Insert the - // markers before the call if \p InsertBefore, and after the call otherwise. - auto insertMarkers = [&](Function *MarkerFunc, ArrayRef<Value *> Objects, - bool InsertBefore) { - for (Value *Mem : Objects) { - assert((!isa<Instruction>(Mem) || cast<Instruction>(Mem)->getFunction() == - TheCall->getFunction()) && - "Input memory not defined in original function"); - Value *&MemAsI8Ptr = Bitcasts[Mem]; - if (!MemAsI8Ptr) { - if (Mem->getType() == Int8PtrTy) - MemAsI8Ptr = Mem; - else - MemAsI8Ptr = - CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall); - } - - auto Marker = CallInst::Create(MarkerFunc, {NegativeOne, MemAsI8Ptr}); - if (InsertBefore) - Marker->insertBefore(TheCall); - else - Marker->insertBefore(Term); - } - }; - - if (!LifetimesStart.empty()) { - auto StartFn = llvm::Intrinsic::getDeclaration( - M, llvm::Intrinsic::lifetime_start, Int8PtrTy); - insertMarkers(StartFn, LifetimesStart, /*InsertBefore=*/true); - } - - if (!LifetimesEnd.empty()) { - auto EndFn = llvm::Intrinsic::getDeclaration( - M, llvm::Intrinsic::lifetime_end, Int8PtrTy); - insertMarkers(EndFn, LifetimesEnd, /*InsertBefore=*/false); - } -} - -/// emitCallAndSwitchStatement - This method sets up the caller side by adding -/// the call instruction, splitting any PHI nodes in the header block as -/// necessary. -CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, - BasicBlock *codeReplacer, - ValueSet &inputs, - ValueSet &outputs) { - // Emit a call to the new function, passing in: *pointer to struct (if - // aggregating parameters), or plan inputs and allocated memory for outputs - std::vector<Value *> params, StructValues, ReloadOutputs, Reloads; - - Module *M = newFunction->getParent(); - LLVMContext &Context = M->getContext(); - const DataLayout &DL = M->getDataLayout(); - CallInst *call = nullptr; - - // Add inputs as params, or to be filled into the struct - unsigned ArgNo = 0; - SmallVector<unsigned, 1> SwiftErrorArgs; - for (Value *input : inputs) { - if (AggregateArgs) - StructValues.push_back(input); - else { - params.push_back(input); - if (input->isSwiftError()) - SwiftErrorArgs.push_back(ArgNo); - } - ++ArgNo; - } - - // Create allocas for the outputs - for (Value *output : outputs) { - if (AggregateArgs) { - StructValues.push_back(output); - } else { - AllocaInst *alloca = - new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), - nullptr, output->getName() + ".loc", - &codeReplacer->getParent()->front().front()); - ReloadOutputs.push_back(alloca); - params.push_back(alloca); - } - } - - StructType *StructArgTy = nullptr; - AllocaInst *Struct = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - std::vector<Type *> ArgTypes; - for (ValueSet::iterator v = StructValues.begin(), - ve = StructValues.end(); v != ve; ++v) - ArgTypes.push_back((*v)->getType()); - - // Allocate a struct at the beginning of this function - StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); - Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, - "structArg", - &codeReplacer->getParent()->front().front()); - params.push_back(Struct); - - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - StoreInst *SI = new StoreInst(StructValues[i], GEP); - codeReplacer->getInstList().push_back(SI); - } - } - - // Emit the call to the function - call = CallInst::Create(newFunction, params, - NumExitBlocks > 1 ? "targetBlock" : ""); - // Add debug location to the new call, if the original function has debug - // info. In that case, the terminator of the entry block of the extracted - // function contains the first debug location of the extracted function, - // set in extractCodeRegion. - if (codeReplacer->getParent()->getSubprogram()) { - if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) - call->setDebugLoc(DL); - } - codeReplacer->getInstList().push_back(call); - - // Set swifterror parameter attributes. - for (unsigned SwiftErrArgNo : SwiftErrorArgs) { - call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); - newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); - } - - Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); - unsigned FirstOut = inputs.size(); - if (!AggregateArgs) - std::advance(OutputArgBegin, inputs.size()); - - // Reload the outputs passed in by reference. - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value *Output = nullptr; - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - Output = GEP; - } else { - Output = ReloadOutputs[i]; - } - LoadInst *load = new LoadInst(outputs[i]->getType(), Output, - outputs[i]->getName() + ".reload"); - Reloads.push_back(load); - codeReplacer->getInstList().push_back(load); - std::vector<User *> Users(outputs[i]->user_begin(), outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction *inst = cast<Instruction>(Users[u]); - if (!Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(outputs[i], load); - } - } - - // Now we can emit a switch statement using the call as a value. - SwitchInst *TheSwitch = - SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), - codeReplacer, 0, codeReplacer); - - // Since there may be multiple exits from the original region, make the new - // function return an unsigned, switch on that number. This loop iterates - // over all of the blocks in the extracted region, updating any terminator - // instructions in the to-be-extracted region that branch to blocks that are - // not in the region to be extracted. - std::map<BasicBlock *, BasicBlock *> ExitBlockMap; - - unsigned switchVal = 0; - for (BasicBlock *Block : Blocks) { - Instruction *TI = Block->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - if (!Blocks.count(TI->getSuccessor(i))) { - BasicBlock *OldTarget = TI->getSuccessor(i); - // add a new basic block which returns the appropriate value - BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; - if (!NewTarget) { - // If we don't already have an exit stub for this non-extracted - // destination, create one now! - NewTarget = BasicBlock::Create(Context, - OldTarget->getName() + ".exitStub", - newFunction); - unsigned SuccNum = switchVal++; - - Value *brVal = nullptr; - switch (NumExitBlocks) { - case 0: - case 1: break; // No value needed. - case 2: // Conditional branch, return a bool - brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); - break; - default: - brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); - break; - } - - ReturnInst::Create(Context, brVal, NewTarget); - - // Update the switch instruction. - TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), - SuccNum), - OldTarget); - } - - // rewrite the original branch instruction with this new target - TI->setSuccessor(i, NewTarget); - } - } - - // Store the arguments right after the definition of output value. - // This should be proceeded after creating exit stubs to be ensure that invoke - // result restore will be placed in the outlined function. - Function::arg_iterator OAI = OutputArgBegin; - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto *OutI = dyn_cast<Instruction>(outputs[i]); - if (!OutI) - continue; - - // Find proper insertion point. - BasicBlock::iterator InsertPt; - // In case OutI is an invoke, we insert the store at the beginning in the - // 'normal destination' BB. Otherwise we insert the store right after OutI. - if (auto *InvokeI = dyn_cast<InvokeInst>(OutI)) - InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); - else if (auto *Phi = dyn_cast<PHINode>(OutI)) - InsertPt = Phi->getParent()->getFirstInsertionPt(); - else - InsertPt = std::next(OutI->getIterator()); - - Instruction *InsertBefore = &*InsertPt; - assert((InsertBefore->getFunction() == newFunction || - Blocks.count(InsertBefore->getParent())) && - "InsertPt should be in new function"); - assert(OAI != newFunction->arg_end() && - "Number of output arguments should match " - "the amount of defined values"); - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), - InsertBefore); - new StoreInst(outputs[i], GEP, InsertBefore); - // Since there should be only one struct argument aggregating - // all the output values, we shouldn't increment OAI, which always - // points to the struct argument, in this case. - } else { - new StoreInst(outputs[i], &*OAI, InsertBefore); - ++OAI; - } - } - - // Now that we've done the deed, simplify the switch instruction. - Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); - switch (NumExitBlocks) { - case 0: - // There are no successors (the block containing the switch itself), which - // means that previously this was the last part of the function, and hence - // this should be rewritten as a `ret' - - // Check if the function should return a value - if (OldFnRetTy->isVoidTy()) { - ReturnInst::Create(Context, nullptr, TheSwitch); // Return void - } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { - // return what we have - ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); - } else { - // Otherwise we must have code extracted an unwind or something, just - // return whatever we want. - ReturnInst::Create(Context, - Constant::getNullValue(OldFnRetTy), TheSwitch); - } - - TheSwitch->eraseFromParent(); - break; - case 1: - // Only a single destination, change the switch into an unconditional - // branch. - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); - TheSwitch->eraseFromParent(); - break; - case 2: - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), - call, TheSwitch); - TheSwitch->eraseFromParent(); - break; - default: - // Otherwise, make the default destination of the switch instruction be one - // of the other successors. - TheSwitch->setCondition(call); - TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); - // Remove redundant case - TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); - break; - } - - // Insert lifetime markers around the reloads of any output values. The - // allocas output values are stored in are only in-use in the codeRepl block. - insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); - - return call; -} - -void CodeExtractor::moveCodeToFunction(Function *newFunction) { - Function *oldFunc = (*Blocks.begin())->getParent(); - Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); - Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); - - for (BasicBlock *Block : Blocks) { - // Delete the basic block from the old function, and the list of blocks - oldBlocks.remove(Block); - - // Insert this basic block into the new function - newBlocks.push_back(Block); - - // Remove @llvm.assume calls that were moved to the new function from the - // old function's assumption cache. - if (AC) - for (auto &I : *Block) - if (match(&I, m_Intrinsic<Intrinsic::assume>())) - AC->unregisterAssumption(cast<CallInst>(&I)); - } -} - -void CodeExtractor::calculateNewCallTerminatorWeights( - BasicBlock *CodeReplacer, - DenseMap<BasicBlock *, BlockFrequency> &ExitWeights, - BranchProbabilityInfo *BPI) { - using Distribution = BlockFrequencyInfoImplBase::Distribution; - using BlockNode = BlockFrequencyInfoImplBase::BlockNode; - - // Update the branch weights for the exit block. - Instruction *TI = CodeReplacer->getTerminator(); - SmallVector<unsigned, 8> BranchWeights(TI->getNumSuccessors(), 0); - - // Block Frequency distribution with dummy node. - Distribution BranchDist; - - // Add each of the frequencies of the successors. - for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) { - BlockNode ExitNode(i); - uint64_t ExitFreq = ExitWeights[TI->getSuccessor(i)].getFrequency(); - if (ExitFreq != 0) - BranchDist.addExit(ExitNode, ExitFreq); - else - BPI->setEdgeProbability(CodeReplacer, i, BranchProbability::getZero()); - } - - // Check for no total weight. - if (BranchDist.Total == 0) - return; - - // Normalize the distribution so that they can fit in unsigned. - BranchDist.normalize(); - - // Create normalized branch weights and set the metadata. - for (unsigned I = 0, E = BranchDist.Weights.size(); I < E; ++I) { - const auto &Weight = BranchDist.Weights[I]; - - // Get the weight and update the current BFI. - BranchWeights[Weight.TargetNode.Index] = Weight.Amount; - BranchProbability BP(Weight.Amount, BranchDist.Total); - BPI->setEdgeProbability(CodeReplacer, Weight.TargetNode.Index, BP); - } - TI->setMetadata( - LLVMContext::MD_prof, - MDBuilder(TI->getContext()).createBranchWeights(BranchWeights)); -} - -Function *CodeExtractor::extractCodeRegion() { - if (!isEligible()) - return nullptr; - - // Assumption: this is a single-entry code region, and the header is the first - // block in the region. - BasicBlock *header = *Blocks.begin(); - Function *oldFunction = header->getParent(); - - // For functions with varargs, check that varargs handling is only done in the - // outlined function, i.e vastart and vaend are only used in outlined blocks. - if (AllowVarArgs && oldFunction->getFunctionType()->isVarArg()) { - auto containsVarArgIntrinsic = [](Instruction &I) { - if (const CallInst *CI = dyn_cast<CallInst>(&I)) - if (const Function *F = CI->getCalledFunction()) - return F->getIntrinsicID() == Intrinsic::vastart || - F->getIntrinsicID() == Intrinsic::vaend; - return false; - }; - - for (auto &BB : *oldFunction) { - if (Blocks.count(&BB)) - continue; - if (llvm::any_of(BB, containsVarArgIntrinsic)) - return nullptr; - } - } - ValueSet inputs, outputs, SinkingCands, HoistingCands; - BasicBlock *CommonExit = nullptr; - - // Calculate the entry frequency of the new function before we change the root - // block. - BlockFrequency EntryFreq; - if (BFI) { - assert(BPI && "Both BPI and BFI are required to preserve profile info"); - for (BasicBlock *Pred : predecessors(header)) { - if (Blocks.count(Pred)) - continue; - EntryFreq += - BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); - } - } - - // If we have any return instructions in the region, split those blocks so - // that the return is not in the region. - splitReturnBlocks(); - - // Calculate the exit blocks for the extracted region and the total exit - // weights for each of those blocks. - DenseMap<BasicBlock *, BlockFrequency> ExitWeights; - SmallPtrSet<BasicBlock *, 1> ExitBlocks; - for (BasicBlock *Block : Blocks) { - for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE; - ++SI) { - if (!Blocks.count(*SI)) { - // Update the branch weight for this successor. - if (BFI) { - BlockFrequency &BF = ExitWeights[*SI]; - BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, *SI); - } - ExitBlocks.insert(*SI); - } - } - } - NumExitBlocks = ExitBlocks.size(); - - // If we have to split PHI nodes of the entry or exit blocks, do so now. - severSplitPHINodesOfEntry(header); - severSplitPHINodesOfExits(ExitBlocks); - - // This takes place of the original loop - BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), - "codeRepl", oldFunction, - header); - - // The new function needs a root node because other nodes can branch to the - // head of the region, but the entry node of a function cannot have preds. - BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), - "newFuncRoot"); - auto *BranchI = BranchInst::Create(header); - // If the original function has debug info, we have to add a debug location - // to the new branch instruction from the artificial entry block. - // We use the debug location of the first instruction in the extracted - // blocks, as there is no other equivalent line in the source code. - if (oldFunction->getSubprogram()) { - any_of(Blocks, [&BranchI](const BasicBlock *BB) { - return any_of(*BB, [&BranchI](const Instruction &I) { - if (!I.getDebugLoc()) - return false; - BranchI->setDebugLoc(I.getDebugLoc()); - return true; - }); - }); - } - newFuncRoot->getInstList().push_back(BranchI); - - findAllocas(SinkingCands, HoistingCands, CommonExit); - assert(HoistingCands.empty() || CommonExit); - - // Find inputs to, outputs from the code region. - findInputsOutputs(inputs, outputs, SinkingCands); - - // Now sink all instructions which only have non-phi uses inside the region. - // Group the allocas at the start of the block, so that any bitcast uses of - // the allocas are well-defined. - AllocaInst *FirstSunkAlloca = nullptr; - for (auto *II : SinkingCands) { - if (auto *AI = dyn_cast<AllocaInst>(II)) { - AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); - if (!FirstSunkAlloca) - FirstSunkAlloca = AI; - } - } - assert((SinkingCands.empty() || FirstSunkAlloca) && - "Did not expect a sink candidate without any allocas"); - for (auto *II : SinkingCands) { - if (!isa<AllocaInst>(II)) { - cast<Instruction>(II)->moveAfter(FirstSunkAlloca); - } - } - - if (!HoistingCands.empty()) { - auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); - Instruction *TI = HoistToBlock->getTerminator(); - for (auto *II : HoistingCands) - cast<Instruction>(II)->moveBefore(TI); - } - - // Collect objects which are inputs to the extraction region and also - // referenced by lifetime start markers within it. The effects of these - // markers must be replicated in the calling function to prevent the stack - // coloring pass from merging slots which store input objects. - ValueSet LifetimesStart; - eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart); - - // Construct new function based on inputs/outputs & add allocas for all defs. - Function *newFunction = - constructFunction(inputs, outputs, header, newFuncRoot, codeReplacer, - oldFunction, oldFunction->getParent()); - - // Update the entry count of the function. - if (BFI) { - auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); - if (Count.hasValue()) - newFunction->setEntryCount( - ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME - BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); - } - - CallInst *TheCall = - emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs); - - moveCodeToFunction(newFunction); - - // Replicate the effects of any lifetime start/end markers which referenced - // input objects in the extraction region by placing markers around the call. - insertLifetimeMarkersSurroundingCall( - oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, TheCall); - - // Propagate personality info to the new function if there is one. - if (oldFunction->hasPersonalityFn()) - newFunction->setPersonalityFn(oldFunction->getPersonalityFn()); - - // Update the branch weights for the exit block. - if (BFI && NumExitBlocks > 1) - calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); - - // Loop over all of the PHI nodes in the header and exit blocks, and change - // any references to the old incoming edge to be the new incoming edge. - for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) { - PHINode *PN = cast<PHINode>(I); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!Blocks.count(PN->getIncomingBlock(i))) - PN->setIncomingBlock(i, newFuncRoot); - } - - for (BasicBlock *ExitBB : ExitBlocks) - for (PHINode &PN : ExitBB->phis()) { - Value *IncomingCodeReplacerVal = nullptr; - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - // Ignore incoming values from outside of the extracted region. - if (!Blocks.count(PN.getIncomingBlock(i))) - continue; - - // Ensure that there is only one incoming value from codeReplacer. - if (!IncomingCodeReplacerVal) { - PN.setIncomingBlock(i, codeReplacer); - IncomingCodeReplacerVal = PN.getIncomingValue(i); - } else - assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && - "PHI has two incompatbile incoming values from codeRepl"); - } - } - - // Erase debug info intrinsics. Variable updates within the new function are - // invisible to debuggers. This could be improved by defining a DISubprogram - // for the new function. - for (BasicBlock &BB : *newFunction) { - auto BlockIt = BB.begin(); - // Remove debug info intrinsics from the new function. - while (BlockIt != BB.end()) { - Instruction *Inst = &*BlockIt; - ++BlockIt; - if (isa<DbgInfoIntrinsic>(Inst)) - Inst->eraseFromParent(); - } - // Remove debug info intrinsics which refer to values in the new function - // from the old function. - SmallVector<DbgVariableIntrinsic *, 4> DbgUsers; - for (Instruction &I : BB) - findDbgUsers(DbgUsers, &I); - for (DbgVariableIntrinsic *DVI : DbgUsers) - DVI->eraseFromParent(); - } - - // Mark the new function `noreturn` if applicable. Terminators which resume - // exception propagation are treated as returning instructions. This is to - // avoid inserting traps after calls to outlined functions which unwind. - bool doesNotReturn = none_of(*newFunction, [](const BasicBlock &BB) { - const Instruction *Term = BB.getTerminator(); - return isa<ReturnInst>(Term) || isa<ResumeInst>(Term); - }); - if (doesNotReturn) - newFunction->setDoesNotReturn(); - - LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) { - newFunction->dump(); - report_fatal_error("verification of newFunction failed!"); - }); - LLVM_DEBUG(if (verifyFunction(*oldFunction)) - report_fatal_error("verification of oldFunction failed!")); - return newFunction; -} diff --git a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp deleted file mode 100644 index 069a86f6ab33..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp +++ /dev/null @@ -1,159 +0,0 @@ -//===- CtorUtils.cpp - Helpers for working with global_ctors ----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines functions that are used to process llvm.global_ctors. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/CtorUtils.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -#define DEBUG_TYPE "ctor_utils" - -using namespace llvm; - -/// Given a specified llvm.global_ctors list, remove the listed elements. -static void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) { - // Filter out the initializer elements to remove. - ConstantArray *OldCA = cast<ConstantArray>(GCL->getInitializer()); - SmallVector<Constant *, 10> CAList; - for (unsigned I = 0, E = OldCA->getNumOperands(); I < E; ++I) - if (!CtorsToRemove.test(I)) - CAList.push_back(OldCA->getOperand(I)); - - // Create the new array initializer. - ArrayType *ATy = - ArrayType::get(OldCA->getType()->getElementType(), CAList.size()); - Constant *CA = ConstantArray::get(ATy, CAList); - - // If we didn't change the number of elements, don't create a new GV. - if (CA->getType() == OldCA->getType()) { - GCL->setInitializer(CA); - return; - } - - // Create the new global and insert it next to the existing list. - GlobalVariable *NGV = - new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(), - CA, "", GCL->getThreadLocalMode()); - GCL->getParent()->getGlobalList().insert(GCL->getIterator(), NGV); - NGV->takeName(GCL); - - // Nuke the old list, replacing any uses with the new one. - if (!GCL->use_empty()) { - Constant *V = NGV; - if (V->getType() != GCL->getType()) - V = ConstantExpr::getBitCast(V, GCL->getType()); - GCL->replaceAllUsesWith(V); - } - GCL->eraseFromParent(); -} - -/// Given a llvm.global_ctors list that we can understand, -/// return a list of the functions and null terminator as a vector. -static std::vector<Function *> parseGlobalCtors(GlobalVariable *GV) { - if (GV->getInitializer()->isNullValue()) - return std::vector<Function *>(); - ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); - std::vector<Function *> Result; - Result.reserve(CA->getNumOperands()); - for (auto &V : CA->operands()) { - ConstantStruct *CS = cast<ConstantStruct>(V); - Result.push_back(dyn_cast<Function>(CS->getOperand(1))); - } - return Result; -} - -/// Find the llvm.global_ctors list, verifying that all initializers have an -/// init priority of 65535. -static GlobalVariable *findGlobalCtors(Module &M) { - GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); - if (!GV) - return nullptr; - - // Verify that the initializer is simple enough for us to handle. We are - // only allowed to optimize the initializer if it is unique. - if (!GV->hasUniqueInitializer()) - return nullptr; - - if (isa<ConstantAggregateZero>(GV->getInitializer())) - return GV; - ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); - - for (auto &V : CA->operands()) { - if (isa<ConstantAggregateZero>(V)) - continue; - ConstantStruct *CS = cast<ConstantStruct>(V); - if (isa<ConstantPointerNull>(CS->getOperand(1))) - continue; - - // Must have a function or null ptr. - if (!isa<Function>(CS->getOperand(1))) - return nullptr; - - // Init priority must be standard. - ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0)); - if (CI->getZExtValue() != 65535) - return nullptr; - } - - return GV; -} - -/// Call "ShouldRemove" for every entry in M's global_ctor list and remove the -/// entries for which it returns true. Return true if anything changed. -bool llvm::optimizeGlobalCtorsList( - Module &M, function_ref<bool(Function *)> ShouldRemove) { - GlobalVariable *GlobalCtors = findGlobalCtors(M); - if (!GlobalCtors) - return false; - - std::vector<Function *> Ctors = parseGlobalCtors(GlobalCtors); - if (Ctors.empty()) - return false; - - bool MadeChange = false; - - // Loop over global ctors, optimizing them when we can. - unsigned NumCtors = Ctors.size(); - BitVector CtorsToRemove(NumCtors); - for (unsigned i = 0; i != Ctors.size() && NumCtors > 0; ++i) { - Function *F = Ctors[i]; - // Found a null terminator in the middle of the list, prune off the rest of - // the list. - if (!F) - continue; - - LLVM_DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n"); - - // We cannot simplify external ctor functions. - if (F->empty()) - continue; - - // If we can evaluate the ctor at compile time, do. - if (ShouldRemove(F)) { - Ctors[i] = nullptr; - CtorsToRemove.set(i); - NumCtors--; - MadeChange = true; - continue; - } - } - - if (!MadeChange) - return false; - - removeGlobalCtors(GlobalCtors, CtorsToRemove); - return true; -} diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp deleted file mode 100644 index 5f53d794fe8a..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp +++ /dev/null @@ -1,153 +0,0 @@ -//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/DenseMap.h" -#include "llvm/Analysis/CFG.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Type.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -using namespace llvm; - -/// DemoteRegToStack - This function takes a virtual register computed by an -/// Instruction and replaces it with a slot in the stack frame, allocated via -/// alloca. This allows the CFG to be changed around without fear of -/// invalidating the SSA information for the value. It returns the pointer to -/// the alloca inserted to create a stack slot for I. -AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, - Instruction *AllocaPoint) { - if (I.use_empty()) { - I.eraseFromParent(); - return nullptr; - } - - Function *F = I.getParent()->getParent(); - const DataLayout &DL = F->getParent()->getDataLayout(); - - // Create a stack slot to hold the value. - AllocaInst *Slot; - if (AllocaPoint) { - Slot = new AllocaInst(I.getType(), DL.getAllocaAddrSpace(), nullptr, - I.getName()+".reg2mem", AllocaPoint); - } else { - Slot = new AllocaInst(I.getType(), DL.getAllocaAddrSpace(), nullptr, - I.getName() + ".reg2mem", &F->getEntryBlock().front()); - } - - // We cannot demote invoke instructions to the stack if their normal edge - // is critical. Therefore, split the critical edge and create a basic block - // into which the store can be inserted. - if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) { - if (!II->getNormalDest()->getSinglePredecessor()) { - unsigned SuccNum = GetSuccessorNumber(II->getParent(), II->getNormalDest()); - assert(isCriticalEdge(II, SuccNum) && "Expected a critical edge!"); - BasicBlock *BB = SplitCriticalEdge(II, SuccNum); - assert(BB && "Unable to split critical edge."); - (void)BB; - } - } - - // Change all of the users of the instruction to read from the stack slot. - while (!I.use_empty()) { - Instruction *U = cast<Instruction>(I.user_back()); - if (PHINode *PN = dyn_cast<PHINode>(U)) { - // If this is a PHI node, we can't insert a load of the value before the - // use. Instead insert the load in the predecessor block corresponding - // to the incoming value. - // - // Note that if there are multiple edges from a basic block to this PHI - // node that we cannot have multiple loads. The problem is that the - // resulting PHI node will have multiple values (from each load) coming in - // from the same block, which is illegal SSA form. For this reason, we - // keep track of and reuse loads we insert. - DenseMap<BasicBlock*, Value*> Loads; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == &I) { - Value *&V = Loads[PN->getIncomingBlock(i)]; - if (!V) { - // Insert the load into the predecessor block - V = new LoadInst(I.getType(), Slot, I.getName() + ".reload", - VolatileLoads, - PN->getIncomingBlock(i)->getTerminator()); - } - PN->setIncomingValue(i, V); - } - - } else { - // If this is a normal instruction, just insert a load. - Value *V = new LoadInst(I.getType(), Slot, I.getName() + ".reload", - VolatileLoads, U); - U->replaceUsesOfWith(&I, V); - } - } - - // Insert stores of the computed value into the stack slot. We have to be - // careful if I is an invoke instruction, because we can't insert the store - // AFTER the terminator instruction. - BasicBlock::iterator InsertPt; - if (!I.isTerminator()) { - InsertPt = ++I.getIterator(); - for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt) - /* empty */; // Don't insert before PHI nodes or landingpad instrs. - } else { - InvokeInst &II = cast<InvokeInst>(I); - InsertPt = II.getNormalDest()->getFirstInsertionPt(); - } - - new StoreInst(&I, Slot, &*InsertPt); - return Slot; -} - -/// DemotePHIToStack - This function takes a virtual register computed by a PHI -/// node and replaces it with a slot in the stack frame allocated via alloca. -/// The PHI node is deleted. It returns the pointer to the alloca inserted. -AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { - if (P->use_empty()) { - P->eraseFromParent(); - return nullptr; - } - - const DataLayout &DL = P->getModule()->getDataLayout(); - - // Create a stack slot to hold the value. - AllocaInst *Slot; - if (AllocaPoint) { - Slot = new AllocaInst(P->getType(), DL.getAllocaAddrSpace(), nullptr, - P->getName()+".reg2mem", AllocaPoint); - } else { - Function *F = P->getParent()->getParent(); - Slot = new AllocaInst(P->getType(), DL.getAllocaAddrSpace(), nullptr, - P->getName() + ".reg2mem", - &F->getEntryBlock().front()); - } - - // Iterate over each operand inserting a store in each predecessor. - for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) { - if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) { - assert(II->getParent() != P->getIncomingBlock(i) && - "Invoke edge not supported yet"); (void)II; - } - new StoreInst(P->getIncomingValue(i), Slot, - P->getIncomingBlock(i)->getTerminator()); - } - - // Insert a load in place of the PHI and replace all uses. - BasicBlock::iterator InsertPt = P->getIterator(); - - for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt) - /* empty */; // Don't insert before PHI nodes or landingpad instrs. - - Value *V = - new LoadInst(P->getType(), Slot, P->getName() + ".reload", &*InsertPt); - P->replaceAllUsesWith(V); - - // Delete PHI. - P->eraseFromParent(); - return Slot; -} diff --git a/contrib/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/contrib/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp deleted file mode 100644 index 4aa40eeadda4..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp +++ /dev/null @@ -1,171 +0,0 @@ -//===- EntryExitInstrumenter.cpp - Function Entry/Exit Instrumentation ----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/EntryExitInstrumenter.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Utils.h" -using namespace llvm; - -static void insertCall(Function &CurFn, StringRef Func, - Instruction *InsertionPt, DebugLoc DL) { - Module &M = *InsertionPt->getParent()->getParent()->getParent(); - LLVMContext &C = InsertionPt->getParent()->getContext(); - - if (Func == "mcount" || - Func == ".mcount" || - Func == "\01__gnu_mcount_nc" || - Func == "\01_mcount" || - Func == "\01mcount" || - Func == "__mcount" || - Func == "_mcount" || - Func == "__cyg_profile_func_enter_bare") { - FunctionCallee Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C)); - CallInst *Call = CallInst::Create(Fn, "", InsertionPt); - Call->setDebugLoc(DL); - return; - } - - if (Func == "__cyg_profile_func_enter" || Func == "__cyg_profile_func_exit") { - Type *ArgTypes[] = {Type::getInt8PtrTy(C), Type::getInt8PtrTy(C)}; - - FunctionCallee Fn = M.getOrInsertFunction( - Func, FunctionType::get(Type::getVoidTy(C), ArgTypes, false)); - - Instruction *RetAddr = CallInst::Create( - Intrinsic::getDeclaration(&M, Intrinsic::returnaddress), - ArrayRef<Value *>(ConstantInt::get(Type::getInt32Ty(C), 0)), "", - InsertionPt); - RetAddr->setDebugLoc(DL); - - Value *Args[] = {ConstantExpr::getBitCast(&CurFn, Type::getInt8PtrTy(C)), - RetAddr}; - - CallInst *Call = - CallInst::Create(Fn, ArrayRef<Value *>(Args), "", InsertionPt); - Call->setDebugLoc(DL); - return; - } - - // We only know how to call a fixed set of instrumentation functions, because - // they all expect different arguments, etc. - report_fatal_error(Twine("Unknown instrumentation function: '") + Func + "'"); -} - -static bool runOnFunction(Function &F, bool PostInlining) { - StringRef EntryAttr = PostInlining ? "instrument-function-entry-inlined" - : "instrument-function-entry"; - - StringRef ExitAttr = PostInlining ? "instrument-function-exit-inlined" - : "instrument-function-exit"; - - StringRef EntryFunc = F.getFnAttribute(EntryAttr).getValueAsString(); - StringRef ExitFunc = F.getFnAttribute(ExitAttr).getValueAsString(); - - bool Changed = false; - - // If the attribute is specified, insert instrumentation and then "consume" - // the attribute so that it's not inserted again if the pass should happen to - // run later for some reason. - - if (!EntryFunc.empty()) { - DebugLoc DL; - if (auto SP = F.getSubprogram()) - DL = DebugLoc::get(SP->getScopeLine(), 0, SP); - - insertCall(F, EntryFunc, &*F.begin()->getFirstInsertionPt(), DL); - Changed = true; - F.removeAttribute(AttributeList::FunctionIndex, EntryAttr); - } - - if (!ExitFunc.empty()) { - for (BasicBlock &BB : F) { - Instruction *T = BB.getTerminator(); - if (!isa<ReturnInst>(T)) - continue; - - // If T is preceded by a musttail call, that's the real terminator. - Instruction *Prev = T->getPrevNode(); - if (BitCastInst *BCI = dyn_cast_or_null<BitCastInst>(Prev)) - Prev = BCI->getPrevNode(); - if (CallInst *CI = dyn_cast_or_null<CallInst>(Prev)) { - if (CI->isMustTailCall()) - T = CI; - } - - DebugLoc DL; - if (DebugLoc TerminatorDL = T->getDebugLoc()) - DL = TerminatorDL; - else if (auto SP = F.getSubprogram()) - DL = DebugLoc::get(0, 0, SP); - - insertCall(F, ExitFunc, T, DL); - Changed = true; - } - F.removeAttribute(AttributeList::FunctionIndex, ExitAttr); - } - - return Changed; -} - -namespace { -struct EntryExitInstrumenter : public FunctionPass { - static char ID; - EntryExitInstrumenter() : FunctionPass(ID) { - initializeEntryExitInstrumenterPass(*PassRegistry::getPassRegistry()); - } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addPreserved<GlobalsAAWrapperPass>(); - } - bool runOnFunction(Function &F) override { return ::runOnFunction(F, false); } -}; -char EntryExitInstrumenter::ID = 0; - -struct PostInlineEntryExitInstrumenter : public FunctionPass { - static char ID; - PostInlineEntryExitInstrumenter() : FunctionPass(ID) { - initializePostInlineEntryExitInstrumenterPass( - *PassRegistry::getPassRegistry()); - } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addPreserved<GlobalsAAWrapperPass>(); - } - bool runOnFunction(Function &F) override { return ::runOnFunction(F, true); } -}; -char PostInlineEntryExitInstrumenter::ID = 0; -} - -INITIALIZE_PASS( - EntryExitInstrumenter, "ee-instrument", - "Instrument function entry/exit with calls to e.g. mcount() (pre inlining)", - false, false) -INITIALIZE_PASS(PostInlineEntryExitInstrumenter, "post-inline-ee-instrument", - "Instrument function entry/exit with calls to e.g. mcount() " - "(post inlining)", - false, false) - -FunctionPass *llvm::createEntryExitInstrumenterPass() { - return new EntryExitInstrumenter(); -} - -FunctionPass *llvm::createPostInlineEntryExitInstrumenterPass() { - return new PostInlineEntryExitInstrumenter(); -} - -PreservedAnalyses -llvm::EntryExitInstrumenterPass::run(Function &F, FunctionAnalysisManager &AM) { - runOnFunction(F, PostInlining); - PreservedAnalyses PA; - PA.preserveSet<CFGAnalyses>(); - return PA; -} diff --git a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp deleted file mode 100644 index 914babeb6829..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp +++ /dev/null @@ -1,94 +0,0 @@ -//===- EscapeEnumerator.cpp -----------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Defines a helper class that enumerates all possible exits from a function, -// including exception handling. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/EscapeEnumerator.h" -#include "llvm/Analysis/EHPersonalities.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/Module.h" -using namespace llvm; - -static FunctionCallee getDefaultPersonalityFn(Module *M) { - LLVMContext &C = M->getContext(); - Triple T(M->getTargetTriple()); - EHPersonality Pers = getDefaultEHPersonality(T); - return M->getOrInsertFunction(getEHPersonalityName(Pers), - FunctionType::get(Type::getInt32Ty(C), true)); -} - -IRBuilder<> *EscapeEnumerator::Next() { - if (Done) - return nullptr; - - // Find all 'return', 'resume', and 'unwind' instructions. - while (StateBB != StateE) { - BasicBlock *CurBB = &*StateBB++; - - // Branches and invokes do not escape, only unwind, resume, and return - // do. - Instruction *TI = CurBB->getTerminator(); - if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI)) - continue; - - Builder.SetInsertPoint(TI); - return &Builder; - } - - Done = true; - - if (!HandleExceptions) - return nullptr; - - if (F.doesNotThrow()) - return nullptr; - - // Find all 'call' instructions that may throw. - SmallVector<Instruction *, 16> Calls; - for (BasicBlock &BB : F) - for (Instruction &II : BB) - if (CallInst *CI = dyn_cast<CallInst>(&II)) - if (!CI->doesNotThrow()) - Calls.push_back(CI); - - if (Calls.empty()) - return nullptr; - - // Create a cleanup block. - LLVMContext &C = F.getContext(); - BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F); - Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C)); - if (!F.hasPersonalityFn()) { - FunctionCallee PersFn = getDefaultPersonalityFn(F.getParent()); - F.setPersonalityFn(cast<Constant>(PersFn.getCallee())); - } - - if (isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) { - report_fatal_error("Scoped EH not supported"); - } - - LandingPadInst *LPad = - LandingPadInst::Create(ExnTy, 1, "cleanup.lpad", CleanupBB); - LPad->setCleanup(true); - ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB); - - // Transform the 'call' instructions into 'invoke's branching to the - // cleanup block. Go in reverse order to make prettier BB names. - SmallVector<Value *, 16> Args; - for (unsigned I = Calls.size(); I != 0;) { - CallInst *CI = cast<CallInst>(Calls[--I]); - changeToInvokeAndSplitBasicBlock(CI, CleanupBB); - } - - Builder.SetInsertPoint(RI); - return &Builder; -} diff --git a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp deleted file mode 100644 index 0e203f4e075d..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp +++ /dev/null @@ -1,731 +0,0 @@ -//===- Evaluator.cpp - LLVM IR evaluator ----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Function evaluator for LLVM IR. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/Evaluator.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalAlias.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/User.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include <iterator> - -#define DEBUG_TYPE "evaluator" - -using namespace llvm; - -static inline bool -isSimpleEnoughValueToCommit(Constant *C, - SmallPtrSetImpl<Constant *> &SimpleConstants, - const DataLayout &DL); - -/// Return true if the specified constant can be handled by the code generator. -/// We don't want to generate something like: -/// void *X = &X/42; -/// because the code generator doesn't have a relocation that can handle that. -/// -/// This function should be called if C was not found (but just got inserted) -/// in SimpleConstants to avoid having to rescan the same constants all the -/// time. -static bool -isSimpleEnoughValueToCommitHelper(Constant *C, - SmallPtrSetImpl<Constant *> &SimpleConstants, - const DataLayout &DL) { - // Simple global addresses are supported, do not allow dllimport or - // thread-local globals. - if (auto *GV = dyn_cast<GlobalValue>(C)) - return !GV->hasDLLImportStorageClass() && !GV->isThreadLocal(); - - // Simple integer, undef, constant aggregate zero, etc are all supported. - if (C->getNumOperands() == 0 || isa<BlockAddress>(C)) - return true; - - // Aggregate values are safe if all their elements are. - if (isa<ConstantAggregate>(C)) { - for (Value *Op : C->operands()) - if (!isSimpleEnoughValueToCommit(cast<Constant>(Op), SimpleConstants, DL)) - return false; - return true; - } - - // We don't know exactly what relocations are allowed in constant expressions, - // so we allow &global+constantoffset, which is safe and uniformly supported - // across targets. - ConstantExpr *CE = cast<ConstantExpr>(C); - switch (CE->getOpcode()) { - case Instruction::BitCast: - // Bitcast is fine if the casted value is fine. - return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); - - case Instruction::IntToPtr: - case Instruction::PtrToInt: - // int <=> ptr is fine if the int type is the same size as the - // pointer type. - if (DL.getTypeSizeInBits(CE->getType()) != - DL.getTypeSizeInBits(CE->getOperand(0)->getType())) - return false; - return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); - - // GEP is fine if it is simple + constant offset. - case Instruction::GetElementPtr: - for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i) - if (!isa<ConstantInt>(CE->getOperand(i))) - return false; - return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); - - case Instruction::Add: - // We allow simple+cst. - if (!isa<ConstantInt>(CE->getOperand(1))) - return false; - return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); - } - return false; -} - -static inline bool -isSimpleEnoughValueToCommit(Constant *C, - SmallPtrSetImpl<Constant *> &SimpleConstants, - const DataLayout &DL) { - // If we already checked this constant, we win. - if (!SimpleConstants.insert(C).second) - return true; - // Check the constant. - return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL); -} - -/// Return true if this constant is simple enough for us to understand. In -/// particular, if it is a cast to anything other than from one pointer type to -/// another pointer type, we punt. We basically just support direct accesses to -/// globals and GEP's of globals. This should be kept up to date with -/// CommitValueTo. -static bool isSimpleEnoughPointerToCommit(Constant *C) { - // Conservatively, avoid aggregate types. This is because we don't - // want to worry about them partially overlapping other stores. - if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType()) - return false; - - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) - // Do not allow weak/*_odr/linkonce linkage or external globals. - return GV->hasUniqueInitializer(); - - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { - // Handle a constantexpr gep. - if (CE->getOpcode() == Instruction::GetElementPtr && - isa<GlobalVariable>(CE->getOperand(0)) && - cast<GEPOperator>(CE)->isInBounds()) { - GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); - // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or - // external globals. - if (!GV->hasUniqueInitializer()) - return false; - - // The first index must be zero. - ConstantInt *CI = dyn_cast<ConstantInt>(*std::next(CE->op_begin())); - if (!CI || !CI->isZero()) return false; - - // The remaining indices must be compile-time known integers within the - // notional bounds of the corresponding static array types. - if (!CE->isGEPWithNoNotionalOverIndexing()) - return false; - - return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); - - // A constantexpr bitcast from a pointer to another pointer is a no-op, - // and we know how to evaluate it by moving the bitcast from the pointer - // operand to the value operand. - } else if (CE->getOpcode() == Instruction::BitCast && - isa<GlobalVariable>(CE->getOperand(0))) { - // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or - // external globals. - return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer(); - } - } - - return false; -} - -/// Apply 'Func' to Ptr. If this returns nullptr, introspect the pointer's -/// type and walk down through the initial elements to obtain additional -/// pointers to try. Returns the first non-null return value from Func, or -/// nullptr if the type can't be introspected further. -static Constant * -evaluateBitcastFromPtr(Constant *Ptr, const DataLayout &DL, - const TargetLibraryInfo *TLI, - std::function<Constant *(Constant *)> Func) { - Constant *Val; - while (!(Val = Func(Ptr))) { - // If Ty is a struct, we can convert the pointer to the struct - // into a pointer to its first member. - // FIXME: This could be extended to support arrays as well. - Type *Ty = cast<PointerType>(Ptr->getType())->getElementType(); - if (!isa<StructType>(Ty)) - break; - - IntegerType *IdxTy = IntegerType::get(Ty->getContext(), 32); - Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); - Constant *const IdxList[] = {IdxZero, IdxZero}; - - Ptr = ConstantExpr::getGetElementPtr(Ty, Ptr, IdxList); - if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) - Ptr = FoldedPtr; - } - return Val; -} - -static Constant *getInitializer(Constant *C) { - auto *GV = dyn_cast<GlobalVariable>(C); - return GV && GV->hasDefinitiveInitializer() ? GV->getInitializer() : nullptr; -} - -/// Return the value that would be computed by a load from P after the stores -/// reflected by 'memory' have been performed. If we can't decide, return null. -Constant *Evaluator::ComputeLoadResult(Constant *P) { - // If this memory location has been recently stored, use the stored value: it - // is the most up-to-date. - auto findMemLoc = [this](Constant *Ptr) { - DenseMap<Constant *, Constant *>::const_iterator I = - MutatedMemory.find(Ptr); - return I != MutatedMemory.end() ? I->second : nullptr; - }; - - if (Constant *Val = findMemLoc(P)) - return Val; - - // Access it. - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { - if (GV->hasDefinitiveInitializer()) - return GV->getInitializer(); - return nullptr; - } - - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) { - switch (CE->getOpcode()) { - // Handle a constantexpr getelementptr. - case Instruction::GetElementPtr: - if (auto *I = getInitializer(CE->getOperand(0))) - return ConstantFoldLoadThroughGEPConstantExpr(I, CE); - break; - // Handle a constantexpr bitcast. - case Instruction::BitCast: - // We're evaluating a load through a pointer that was bitcast to a - // different type. See if the "from" pointer has recently been stored. - // If it hasn't, we may still be able to find a stored pointer by - // introspecting the type. - Constant *Val = - evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, findMemLoc); - if (!Val) - Val = getInitializer(CE->getOperand(0)); - if (Val) - return ConstantFoldLoadThroughBitcast( - Val, P->getType()->getPointerElementType(), DL); - break; - } - } - - return nullptr; // don't know how to evaluate. -} - -static Function *getFunction(Constant *C) { - if (auto *Fn = dyn_cast<Function>(C)) - return Fn; - - if (auto *Alias = dyn_cast<GlobalAlias>(C)) - if (auto *Fn = dyn_cast<Function>(Alias->getAliasee())) - return Fn; - return nullptr; -} - -Function * -Evaluator::getCalleeWithFormalArgs(CallSite &CS, - SmallVector<Constant *, 8> &Formals) { - auto *V = CS.getCalledValue(); - if (auto *Fn = getFunction(getVal(V))) - return getFormalParams(CS, Fn, Formals) ? Fn : nullptr; - - auto *CE = dyn_cast<ConstantExpr>(V); - if (!CE || CE->getOpcode() != Instruction::BitCast || - !getFormalParams(CS, getFunction(CE->getOperand(0)), Formals)) - return nullptr; - - return dyn_cast<Function>( - ConstantFoldLoadThroughBitcast(CE, CE->getOperand(0)->getType(), DL)); -} - -bool Evaluator::getFormalParams(CallSite &CS, Function *F, - SmallVector<Constant *, 8> &Formals) { - if (!F) - return false; - - auto *FTy = F->getFunctionType(); - if (FTy->getNumParams() > CS.getNumArgOperands()) { - LLVM_DEBUG(dbgs() << "Too few arguments for function.\n"); - return false; - } - - auto ArgI = CS.arg_begin(); - for (auto ParI = FTy->param_begin(), ParE = FTy->param_end(); ParI != ParE; - ++ParI) { - auto *ArgC = ConstantFoldLoadThroughBitcast(getVal(*ArgI), *ParI, DL); - if (!ArgC) { - LLVM_DEBUG(dbgs() << "Can not convert function argument.\n"); - return false; - } - Formals.push_back(ArgC); - ++ArgI; - } - return true; -} - -/// If call expression contains bitcast then we may need to cast -/// evaluated return value to a type of the call expression. -Constant *Evaluator::castCallResultIfNeeded(Value *CallExpr, Constant *RV) { - ConstantExpr *CE = dyn_cast<ConstantExpr>(CallExpr); - if (!RV || !CE || CE->getOpcode() != Instruction::BitCast) - return RV; - - if (auto *FT = - dyn_cast<FunctionType>(CE->getType()->getPointerElementType())) { - RV = ConstantFoldLoadThroughBitcast(RV, FT->getReturnType(), DL); - if (!RV) - LLVM_DEBUG(dbgs() << "Failed to fold bitcast call expr\n"); - } - return RV; -} - -/// Evaluate all instructions in block BB, returning true if successful, false -/// if we can't evaluate it. NewBB returns the next BB that control flows into, -/// or null upon return. -bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, - BasicBlock *&NextBB) { - // This is the main evaluation loop. - while (true) { - Constant *InstResult = nullptr; - - LLVM_DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); - - if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { - if (!SI->isSimple()) { - LLVM_DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n"); - return false; // no volatile/atomic accesses. - } - Constant *Ptr = getVal(SI->getOperand(1)); - if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) { - LLVM_DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr); - Ptr = FoldedPtr; - LLVM_DEBUG(dbgs() << "; To: " << *Ptr << "\n"); - } - if (!isSimpleEnoughPointerToCommit(Ptr)) { - // If this is too complex for us to commit, reject it. - LLVM_DEBUG( - dbgs() << "Pointer is too complex for us to evaluate store."); - return false; - } - - Constant *Val = getVal(SI->getOperand(0)); - - // If this might be too difficult for the backend to handle (e.g. the addr - // of one global variable divided by another) then we can't commit it. - if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) { - LLVM_DEBUG(dbgs() << "Store value is too complex to evaluate store. " - << *Val << "\n"); - return false; - } - - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { - if (CE->getOpcode() == Instruction::BitCast) { - LLVM_DEBUG(dbgs() - << "Attempting to resolve bitcast on constant ptr.\n"); - // If we're evaluating a store through a bitcast, then we need - // to pull the bitcast off the pointer type and push it onto the - // stored value. In order to push the bitcast onto the stored value, - // a bitcast from the pointer's element type to Val's type must be - // legal. If it's not, we can try introspecting the type to find a - // legal conversion. - - auto castValTy = [&](Constant *P) -> Constant * { - Type *Ty = cast<PointerType>(P->getType())->getElementType(); - if (Constant *FV = ConstantFoldLoadThroughBitcast(Val, Ty, DL)) { - Ptr = P; - return FV; - } - return nullptr; - }; - - Constant *NewVal = - evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, castValTy); - if (!NewVal) { - LLVM_DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " - "evaluate.\n"); - return false; - } - - Val = NewVal; - LLVM_DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n"); - } - } - - MutatedMemory[Ptr] = Val; - } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { - InstResult = ConstantExpr::get(BO->getOpcode(), - getVal(BO->getOperand(0)), - getVal(BO->getOperand(1))); - LLVM_DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " - << *InstResult << "\n"); - } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) { - InstResult = ConstantExpr::getCompare(CI->getPredicate(), - getVal(CI->getOperand(0)), - getVal(CI->getOperand(1))); - LLVM_DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult - << "\n"); - } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) { - InstResult = ConstantExpr::getCast(CI->getOpcode(), - getVal(CI->getOperand(0)), - CI->getType()); - LLVM_DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult - << "\n"); - } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) { - InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)), - getVal(SI->getOperand(1)), - getVal(SI->getOperand(2))); - LLVM_DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult - << "\n"); - } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) { - InstResult = ConstantExpr::getExtractValue( - getVal(EVI->getAggregateOperand()), EVI->getIndices()); - LLVM_DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " - << *InstResult << "\n"); - } else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) { - InstResult = ConstantExpr::getInsertValue( - getVal(IVI->getAggregateOperand()), - getVal(IVI->getInsertedValueOperand()), IVI->getIndices()); - LLVM_DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " - << *InstResult << "\n"); - } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { - Constant *P = getVal(GEP->getOperand(0)); - SmallVector<Constant*, 8> GEPOps; - for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); - i != e; ++i) - GEPOps.push_back(getVal(*i)); - InstResult = - ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps, - cast<GEPOperator>(GEP)->isInBounds()); - LLVM_DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult << "\n"); - } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { - if (!LI->isSimple()) { - LLVM_DEBUG( - dbgs() << "Found a Load! Not a simple load, can not evaluate.\n"); - return false; // no volatile/atomic accesses. - } - - Constant *Ptr = getVal(LI->getOperand(0)); - if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) { - Ptr = FoldedPtr; - LLVM_DEBUG(dbgs() << "Found a constant pointer expression, constant " - "folding: " - << *Ptr << "\n"); - } - InstResult = ComputeLoadResult(Ptr); - if (!InstResult) { - LLVM_DEBUG( - dbgs() << "Failed to compute load result. Can not evaluate load." - "\n"); - return false; // Could not evaluate load. - } - - LLVM_DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n"); - } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { - if (AI->isArrayAllocation()) { - LLVM_DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n"); - return false; // Cannot handle array allocs. - } - Type *Ty = AI->getAllocatedType(); - AllocaTmps.push_back(llvm::make_unique<GlobalVariable>( - Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), - AI->getName(), /*TLMode=*/GlobalValue::NotThreadLocal, - AI->getType()->getPointerAddressSpace())); - InstResult = AllocaTmps.back().get(); - LLVM_DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n"); - } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) { - CallSite CS(&*CurInst); - - // Debug info can safely be ignored here. - if (isa<DbgInfoIntrinsic>(CS.getInstruction())) { - LLVM_DEBUG(dbgs() << "Ignoring debug info.\n"); - ++CurInst; - continue; - } - - // Cannot handle inline asm. - if (isa<InlineAsm>(CS.getCalledValue())) { - LLVM_DEBUG(dbgs() << "Found inline asm, can not evaluate.\n"); - return false; - } - - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { - if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) { - if (MSI->isVolatile()) { - LLVM_DEBUG(dbgs() << "Can not optimize a volatile memset " - << "intrinsic.\n"); - return false; - } - Constant *Ptr = getVal(MSI->getDest()); - Constant *Val = getVal(MSI->getValue()); - Constant *DestVal = ComputeLoadResult(getVal(Ptr)); - if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { - // This memset is a no-op. - LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n"); - ++CurInst; - continue; - } - } - - if (II->isLifetimeStartOrEnd()) { - LLVM_DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n"); - ++CurInst; - continue; - } - - if (II->getIntrinsicID() == Intrinsic::invariant_start) { - // We don't insert an entry into Values, as it doesn't have a - // meaningful return value. - if (!II->use_empty()) { - LLVM_DEBUG(dbgs() - << "Found unused invariant_start. Can't evaluate.\n"); - return false; - } - ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0)); - Value *PtrArg = getVal(II->getArgOperand(1)); - Value *Ptr = PtrArg->stripPointerCasts(); - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) { - Type *ElemTy = GV->getValueType(); - if (!Size->isMinusOne() && - Size->getValue().getLimitedValue() >= - DL.getTypeStoreSize(ElemTy)) { - Invariants.insert(GV); - LLVM_DEBUG(dbgs() << "Found a global var that is an invariant: " - << *GV << "\n"); - } else { - LLVM_DEBUG(dbgs() - << "Found a global var, but can not treat it as an " - "invariant.\n"); - } - } - // Continue even if we do nothing. - ++CurInst; - continue; - } else if (II->getIntrinsicID() == Intrinsic::assume) { - LLVM_DEBUG(dbgs() << "Skipping assume intrinsic.\n"); - ++CurInst; - continue; - } else if (II->getIntrinsicID() == Intrinsic::sideeffect) { - LLVM_DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n"); - ++CurInst; - continue; - } - - LLVM_DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); - return false; - } - - // Resolve function pointers. - SmallVector<Constant *, 8> Formals; - Function *Callee = getCalleeWithFormalArgs(CS, Formals); - if (!Callee || Callee->isInterposable()) { - LLVM_DEBUG(dbgs() << "Can not resolve function pointer.\n"); - return false; // Cannot resolve. - } - - if (Callee->isDeclaration()) { - // If this is a function we can constant fold, do it. - if (Constant *C = ConstantFoldCall(cast<CallBase>(CS.getInstruction()), - Callee, Formals, TLI)) { - InstResult = castCallResultIfNeeded(CS.getCalledValue(), C); - if (!InstResult) - return false; - LLVM_DEBUG(dbgs() << "Constant folded function call. Result: " - << *InstResult << "\n"); - } else { - LLVM_DEBUG(dbgs() << "Can not constant fold function call.\n"); - return false; - } - } else { - if (Callee->getFunctionType()->isVarArg()) { - LLVM_DEBUG(dbgs() << "Can not constant fold vararg function call.\n"); - return false; - } - - Constant *RetVal = nullptr; - // Execute the call, if successful, use the return value. - ValueStack.emplace_back(); - if (!EvaluateFunction(Callee, RetVal, Formals)) { - LLVM_DEBUG(dbgs() << "Failed to evaluate function.\n"); - return false; - } - ValueStack.pop_back(); - InstResult = castCallResultIfNeeded(CS.getCalledValue(), RetVal); - if (RetVal && !InstResult) - return false; - - if (InstResult) { - LLVM_DEBUG(dbgs() << "Successfully evaluated function. Result: " - << *InstResult << "\n\n"); - } else { - LLVM_DEBUG(dbgs() - << "Successfully evaluated function. Result: 0\n\n"); - } - } - } else if (CurInst->isTerminator()) { - LLVM_DEBUG(dbgs() << "Found a terminator instruction.\n"); - - if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) { - if (BI->isUnconditional()) { - NextBB = BI->getSuccessor(0); - } else { - ConstantInt *Cond = - dyn_cast<ConstantInt>(getVal(BI->getCondition())); - if (!Cond) return false; // Cannot determine. - - NextBB = BI->getSuccessor(!Cond->getZExtValue()); - } - } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) { - ConstantInt *Val = - dyn_cast<ConstantInt>(getVal(SI->getCondition())); - if (!Val) return false; // Cannot determine. - NextBB = SI->findCaseValue(Val)->getCaseSuccessor(); - } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) { - Value *Val = getVal(IBI->getAddress())->stripPointerCasts(); - if (BlockAddress *BA = dyn_cast<BlockAddress>(Val)) - NextBB = BA->getBasicBlock(); - else - return false; // Cannot determine. - } else if (isa<ReturnInst>(CurInst)) { - NextBB = nullptr; - } else { - // invoke, unwind, resume, unreachable. - LLVM_DEBUG(dbgs() << "Can not handle terminator."); - return false; // Cannot handle this terminator. - } - - // We succeeded at evaluating this block! - LLVM_DEBUG(dbgs() << "Successfully evaluated block.\n"); - return true; - } else { - // Did not know how to evaluate this! - LLVM_DEBUG( - dbgs() << "Failed to evaluate block due to unhandled instruction." - "\n"); - return false; - } - - if (!CurInst->use_empty()) { - if (auto *FoldedInstResult = ConstantFoldConstant(InstResult, DL, TLI)) - InstResult = FoldedInstResult; - - setVal(&*CurInst, InstResult); - } - - // If we just processed an invoke, we finished evaluating the block. - if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) { - NextBB = II->getNormalDest(); - LLVM_DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n"); - return true; - } - - // Advance program counter. - ++CurInst; - } -} - -/// Evaluate a call to function F, returning true if successful, false if we -/// can't evaluate it. ActualArgs contains the formal arguments for the -/// function. -bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, - const SmallVectorImpl<Constant*> &ActualArgs) { - // Check to see if this function is already executing (recursion). If so, - // bail out. TODO: we might want to accept limited recursion. - if (is_contained(CallStack, F)) - return false; - - CallStack.push_back(F); - - // Initialize arguments to the incoming values specified. - unsigned ArgNo = 0; - for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; - ++AI, ++ArgNo) - setVal(&*AI, ActualArgs[ArgNo]); - - // ExecutedBlocks - We only handle non-looping, non-recursive code. As such, - // we can only evaluate any one basic block at most once. This set keeps - // track of what we have executed so we can detect recursive cases etc. - SmallPtrSet<BasicBlock*, 32> ExecutedBlocks; - - // CurBB - The current basic block we're evaluating. - BasicBlock *CurBB = &F->front(); - - BasicBlock::iterator CurInst = CurBB->begin(); - - while (true) { - BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings. - LLVM_DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n"); - - if (!EvaluateBlock(CurInst, NextBB)) - return false; - - if (!NextBB) { - // Successfully running until there's no next block means that we found - // the return. Fill it the return value and pop the call stack. - ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator()); - if (RI->getNumOperands()) - RetVal = getVal(RI->getOperand(0)); - CallStack.pop_back(); - return true; - } - - // Okay, we succeeded in evaluating this control flow. See if we have - // executed the new block before. If so, we have a looping function, - // which we cannot evaluate in reasonable time. - if (!ExecutedBlocks.insert(NextBB).second) - return false; // looped! - - // Okay, we have never been in this block before. Check to see if there - // are any PHI nodes. If so, evaluate them with information about where - // we came from. - PHINode *PN = nullptr; - for (CurInst = NextBB->begin(); - (PN = dyn_cast<PHINode>(CurInst)); ++CurInst) - setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB))); - - // Advance to the next block. - CurBB = NextBB; - } -} diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp deleted file mode 100644 index 0c52e6f3703b..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp +++ /dev/null @@ -1,491 +0,0 @@ -//===- FlatternCFG.cpp - Code to perform CFG flattening -------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Reduce conditional branches in CFG. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include <cassert> - -using namespace llvm; - -#define DEBUG_TYPE "flattencfg" - -namespace { - -class FlattenCFGOpt { - AliasAnalysis *AA; - - /// Use parallel-and or parallel-or to generate conditions for - /// conditional branches. - bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder); - - /// If \param BB is the merge block of an if-region, attempt to merge - /// the if-region with an adjacent if-region upstream if two if-regions - /// contain identical instructions. - bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder); - - /// Compare a pair of blocks: \p Block1 and \p Block2, which - /// are from two if-regions whose entry blocks are \p Head1 and \p - /// Head2. \returns true if \p Block1 and \p Block2 contain identical - /// instructions, and have no memory reference alias with \p Head2. - /// This is used as a legality check for merging if-regions. - bool CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, - BasicBlock *Block1, BasicBlock *Block2); - -public: - FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {} - - bool run(BasicBlock *BB); -}; - -} // end anonymous namespace - -/// If \param [in] BB has more than one predecessor that is a conditional -/// branch, attempt to use parallel and/or for the branch condition. \returns -/// true on success. -/// -/// Before: -/// ...... -/// %cmp10 = fcmp une float %tmp1, %tmp2 -/// br i1 %cmp1, label %if.then, label %lor.rhs -/// -/// lor.rhs: -/// ...... -/// %cmp11 = fcmp une float %tmp3, %tmp4 -/// br i1 %cmp11, label %if.then, label %ifend -/// -/// if.end: // the merge block -/// ...... -/// -/// if.then: // has two predecessors, both of them contains conditional branch. -/// ...... -/// br label %if.end; -/// -/// After: -/// ...... -/// %cmp10 = fcmp une float %tmp1, %tmp2 -/// ...... -/// %cmp11 = fcmp une float %tmp3, %tmp4 -/// %cmp12 = or i1 %cmp10, %cmp11 // parallel-or mode. -/// br i1 %cmp12, label %if.then, label %ifend -/// -/// if.end: -/// ...... -/// -/// if.then: -/// ...... -/// br label %if.end; -/// -/// Current implementation handles two cases. -/// Case 1: \param BB is on the else-path. -/// -/// BB1 -/// / | -/// BB2 | -/// / \ | -/// BB3 \ | where, BB1, BB2 contain conditional branches. -/// \ | / BB3 contains unconditional branch. -/// \ | / BB4 corresponds to \param BB which is also the merge. -/// BB => BB4 -/// -/// -/// Corresponding source code: -/// -/// if (a == b && c == d) -/// statement; // BB3 -/// -/// Case 2: \param BB BB is on the then-path. -/// -/// BB1 -/// / | -/// | BB2 -/// \ / | where BB1, BB2 contain conditional branches. -/// BB => BB3 | BB3 contains unconditiona branch and corresponds -/// \ / to \param BB. BB4 is the merge. -/// BB4 -/// -/// Corresponding source code: -/// -/// if (a == b || c == d) -/// statement; // BB3 -/// -/// In both cases, \param BB is the common successor of conditional branches. -/// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as -/// its predecessor. In Case 2, \param BB (BB3) only has conditional branches -/// as its predecessors. -bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) { - PHINode *PHI = dyn_cast<PHINode>(BB->begin()); - if (PHI) - return false; // For simplicity, avoid cases containing PHI nodes. - - BasicBlock *LastCondBlock = nullptr; - BasicBlock *FirstCondBlock = nullptr; - BasicBlock *UnCondBlock = nullptr; - int Idx = -1; - - // Check predecessors of \param BB. - SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); - for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end(); - PI != PE; ++PI) { - BasicBlock *Pred = *PI; - BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()); - - // All predecessors should terminate with a branch. - if (!PBI) - return false; - - BasicBlock *PP = Pred->getSinglePredecessor(); - - if (PBI->isUnconditional()) { - // Case 1: Pred (BB3) is an unconditional block, it should - // have a single predecessor (BB2) that is also a predecessor - // of \param BB (BB4) and should not have address-taken. - // There should exist only one such unconditional - // branch among the predecessors. - if (UnCondBlock || !PP || (Preds.count(PP) == 0) || - Pred->hasAddressTaken()) - return false; - - UnCondBlock = Pred; - continue; - } - - // Only conditional branches are allowed beyond this point. - assert(PBI->isConditional()); - - // Condition's unique use should be the branch instruction. - Value *PC = PBI->getCondition(); - if (!PC || !PC->hasOneUse()) - return false; - - if (PP && Preds.count(PP)) { - // These are internal condition blocks to be merged from, e.g., - // BB2 in both cases. - // Should not be address-taken. - if (Pred->hasAddressTaken()) - return false; - - // Instructions in the internal condition blocks should be safe - // to hoist up. - for (BasicBlock::iterator BI = Pred->begin(), BE = PBI->getIterator(); - BI != BE;) { - Instruction *CI = &*BI++; - if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI)) - return false; - } - } else { - // This is the condition block to be merged into, e.g. BB1 in - // both cases. - if (FirstCondBlock) - return false; - FirstCondBlock = Pred; - } - - // Find whether BB is uniformly on the true (or false) path - // for all of its predecessors. - BasicBlock *PS1 = PBI->getSuccessor(0); - BasicBlock *PS2 = PBI->getSuccessor(1); - BasicBlock *PS = (PS1 == BB) ? PS2 : PS1; - int CIdx = (PS1 == BB) ? 0 : 1; - - if (Idx == -1) - Idx = CIdx; - else if (CIdx != Idx) - return false; - - // PS is the successor which is not BB. Check successors to identify - // the last conditional branch. - if (Preds.count(PS) == 0) { - // Case 2. - LastCondBlock = Pred; - } else { - // Case 1 - BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator()); - if (BPS && BPS->isUnconditional()) { - // Case 1: PS(BB3) should be an unconditional branch. - LastCondBlock = Pred; - } - } - } - - if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock)) - return false; - - Instruction *TBB = LastCondBlock->getTerminator(); - BasicBlock *PS1 = TBB->getSuccessor(0); - BasicBlock *PS2 = TBB->getSuccessor(1); - BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator()); - BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator()); - - // If PS1 does not jump into PS2, but PS2 jumps into PS1, - // attempt branch inversion. - if (!PBI1 || !PBI1->isUnconditional() || - (PS1->getTerminator()->getSuccessor(0) != PS2)) { - // Check whether PS2 jumps into PS1. - if (!PBI2 || !PBI2->isUnconditional() || - (PS2->getTerminator()->getSuccessor(0) != PS1)) - return false; - - // Do branch inversion. - BasicBlock *CurrBlock = LastCondBlock; - bool EverChanged = false; - for (; CurrBlock != FirstCondBlock; - CurrBlock = CurrBlock->getSinglePredecessor()) { - BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator()); - CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); - if (!CI) - continue; - - CmpInst::Predicate Predicate = CI->getPredicate(); - // Canonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq - if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) { - CI->setPredicate(ICmpInst::getInversePredicate(Predicate)); - BI->swapSuccessors(); - EverChanged = true; - } - } - return EverChanged; - } - - // PS1 must have a conditional branch. - if (!PBI1 || !PBI1->isUnconditional()) - return false; - - // PS2 should not contain PHI node. - PHI = dyn_cast<PHINode>(PS2->begin()); - if (PHI) - return false; - - // Do the transformation. - BasicBlock *CB; - BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator()); - bool Iteration = true; - IRBuilder<>::InsertPointGuard Guard(Builder); - Value *PC = PBI->getCondition(); - - do { - CB = PBI->getSuccessor(1 - Idx); - // Delete the conditional branch. - FirstCondBlock->getInstList().pop_back(); - FirstCondBlock->getInstList() - .splice(FirstCondBlock->end(), CB->getInstList()); - PBI = cast<BranchInst>(FirstCondBlock->getTerminator()); - Value *CC = PBI->getCondition(); - // Merge conditions. - Builder.SetInsertPoint(PBI); - Value *NC; - if (Idx == 0) - // Case 2, use parallel or. - NC = Builder.CreateOr(PC, CC); - else - // Case 1, use parallel and. - NC = Builder.CreateAnd(PC, CC); - - PBI->replaceUsesOfWith(CC, NC); - PC = NC; - if (CB == LastCondBlock) - Iteration = false; - // Remove internal conditional branches. - CB->dropAllReferences(); - // make CB unreachable and let downstream to delete the block. - new UnreachableInst(CB->getContext(), CB); - } while (Iteration); - - LLVM_DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock); - return true; -} - -/// Compare blocks from two if-regions, where \param Head1 is the entry of the -/// 1st if-region. \param Head2 is the entry of the 2nd if-region. \param -/// Block1 is a block in the 1st if-region to compare. \param Block2 is a block -// in the 2nd if-region to compare. \returns true if \param Block1 and \param -/// Block2 have identical instructions and do not have memory reference alias -/// with \param Head2. -bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, - BasicBlock *Block1, - BasicBlock *Block2) { - Instruction *PTI2 = Head2->getTerminator(); - Instruction *PBI2 = &Head2->front(); - - bool eq1 = (Block1 == Head1); - bool eq2 = (Block2 == Head2); - if (eq1 || eq2) { - // An empty then-path or else-path. - return (eq1 == eq2); - } - - // Check whether instructions in Block1 and Block2 are identical - // and do not alias with instructions in Head2. - BasicBlock::iterator iter1 = Block1->begin(); - BasicBlock::iterator end1 = Block1->getTerminator()->getIterator(); - BasicBlock::iterator iter2 = Block2->begin(); - BasicBlock::iterator end2 = Block2->getTerminator()->getIterator(); - - while (true) { - if (iter1 == end1) { - if (iter2 != end2) - return false; - break; - } - - if (!iter1->isIdenticalTo(&*iter2)) - return false; - - // Illegal to remove instructions with side effects except - // non-volatile stores. - if (iter1->mayHaveSideEffects()) { - Instruction *CurI = &*iter1; - StoreInst *SI = dyn_cast<StoreInst>(CurI); - if (!SI || SI->isVolatile()) - return false; - } - - // For simplicity and speed, data dependency check can be - // avoided if read from memory doesn't exist. - if (iter1->mayReadFromMemory()) - return false; - - if (iter1->mayWriteToMemory()) { - for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) { - if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) { - // Check alias with Head2. - if (!AA || AA->alias(&*iter1, &*BI)) - return false; - } - } - } - ++iter1; - ++iter2; - } - - return true; -} - -/// Check whether \param BB is the merge block of a if-region. If yes, check -/// whether there exists an adjacent if-region upstream, the two if-regions -/// contain identical instructions and can be legally merged. \returns true if -/// the two if-regions are merged. -/// -/// From: -/// if (a) -/// statement; -/// if (b) -/// statement; -/// -/// To: -/// if (a || b) -/// statement; -bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) { - BasicBlock *IfTrue2, *IfFalse2; - Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2); - Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2); - if (!CInst2) - return false; - - BasicBlock *SecondEntryBlock = CInst2->getParent(); - if (SecondEntryBlock->hasAddressTaken()) - return false; - - BasicBlock *IfTrue1, *IfFalse1; - Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1); - Instruction *CInst1 = dyn_cast_or_null<Instruction>(IfCond1); - if (!CInst1) - return false; - - BasicBlock *FirstEntryBlock = CInst1->getParent(); - - // Either then-path or else-path should be empty. - if ((IfTrue1 != FirstEntryBlock) && (IfFalse1 != FirstEntryBlock)) - return false; - if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock)) - return false; - - Instruction *PTI2 = SecondEntryBlock->getTerminator(); - Instruction *PBI2 = &SecondEntryBlock->front(); - - if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1, - IfTrue2)) - return false; - - if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfFalse1, - IfFalse2)) - return false; - - // Check whether \param SecondEntryBlock has side-effect and is safe to - // speculate. - for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) { - Instruction *CI = &*BI; - if (isa<PHINode>(CI) || CI->mayHaveSideEffects() || - !isSafeToSpeculativelyExecute(CI)) - return false; - } - - // Merge \param SecondEntryBlock into \param FirstEntryBlock. - FirstEntryBlock->getInstList().pop_back(); - FirstEntryBlock->getInstList() - .splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList()); - BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator()); - Value *CC = PBI->getCondition(); - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); - Builder.SetInsertPoint(PBI); - Value *NC = Builder.CreateOr(CInst1, CC); - PBI->replaceUsesOfWith(CC, NC); - Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); - - // Remove IfTrue1 - if (IfTrue1 != FirstEntryBlock) { - IfTrue1->dropAllReferences(); - IfTrue1->eraseFromParent(); - } - - // Remove IfFalse1 - if (IfFalse1 != FirstEntryBlock) { - IfFalse1->dropAllReferences(); - IfFalse1->eraseFromParent(); - } - - // Remove \param SecondEntryBlock - SecondEntryBlock->dropAllReferences(); - SecondEntryBlock->eraseFromParent(); - LLVM_DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock); - return true; -} - -bool FlattenCFGOpt::run(BasicBlock *BB) { - assert(BB && BB->getParent() && "Block not embedded in function!"); - assert(BB->getTerminator() && "Degenerate basic block encountered!"); - - IRBuilder<> Builder(BB); - - if (FlattenParallelAndOr(BB, Builder) || MergeIfRegion(BB, Builder)) - return true; - return false; -} - -/// FlattenCFG - This function is used to flatten a CFG. For -/// example, it uses parallel-and and parallel-or mode to collapse -/// if-conditions and merge if-regions with identical statements. -bool llvm::FlattenCFG(BasicBlock *BB, AliasAnalysis *AA) { - return FlattenCFGOpt(AA).run(BB); -} diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp deleted file mode 100644 index a9b28754c8e9..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ /dev/null @@ -1,948 +0,0 @@ -//===- FunctionComparator.h - Function Comparator -------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the FunctionComparator and GlobalNumberState classes -// which are used by the MergeFunctions pass for comparing functions. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/FunctionComparator.h" -#include "llvm/ADT/APFloat.h" -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/Hashing.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/InlineAsm.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include <cassert> -#include <cstddef> -#include <cstdint> -#include <utility> - -using namespace llvm; - -#define DEBUG_TYPE "functioncomparator" - -int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const { - if (L < R) return -1; - if (L > R) return 1; - return 0; -} - -int FunctionComparator::cmpOrderings(AtomicOrdering L, AtomicOrdering R) const { - if ((int)L < (int)R) return -1; - if ((int)L > (int)R) return 1; - return 0; -} - -int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const { - if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth())) - return Res; - if (L.ugt(R)) return 1; - if (R.ugt(L)) return -1; - return 0; -} - -int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const { - // Floats are ordered first by semantics (i.e. float, double, half, etc.), - // then by value interpreted as a bitstring (aka APInt). - const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics(); - if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL), - APFloat::semanticsPrecision(SR))) - return Res; - if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL), - APFloat::semanticsMaxExponent(SR))) - return Res; - if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL), - APFloat::semanticsMinExponent(SR))) - return Res; - if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL), - APFloat::semanticsSizeInBits(SR))) - return Res; - return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt()); -} - -int FunctionComparator::cmpMem(StringRef L, StringRef R) const { - // Prevent heavy comparison, compare sizes first. - if (int Res = cmpNumbers(L.size(), R.size())) - return Res; - - // Compare strings lexicographically only when it is necessary: only when - // strings are equal in size. - return L.compare(R); -} - -int FunctionComparator::cmpAttrs(const AttributeList L, - const AttributeList R) const { - if (int Res = cmpNumbers(L.getNumAttrSets(), R.getNumAttrSets())) - return Res; - - for (unsigned i = L.index_begin(), e = L.index_end(); i != e; ++i) { - AttributeSet LAS = L.getAttributes(i); - AttributeSet RAS = R.getAttributes(i); - AttributeSet::iterator LI = LAS.begin(), LE = LAS.end(); - AttributeSet::iterator RI = RAS.begin(), RE = RAS.end(); - for (; LI != LE && RI != RE; ++LI, ++RI) { - Attribute LA = *LI; - Attribute RA = *RI; - if (LA.isTypeAttribute() && RA.isTypeAttribute()) { - if (LA.getKindAsEnum() != RA.getKindAsEnum()) - return cmpNumbers(LA.getKindAsEnum(), RA.getKindAsEnum()); - - Type *TyL = LA.getValueAsType(); - Type *TyR = RA.getValueAsType(); - if (TyL && TyR) - return cmpTypes(TyL, TyR); - - // Two pointers, at least one null, so the comparison result is - // independent of the value of a real pointer. - return cmpNumbers((uint64_t)TyL, (uint64_t)TyR); - } - if (LA < RA) - return -1; - if (RA < LA) - return 1; - } - if (LI != LE) - return 1; - if (RI != RE) - return -1; - } - return 0; -} - -int FunctionComparator::cmpRangeMetadata(const MDNode *L, - const MDNode *R) const { - if (L == R) - return 0; - if (!L) - return -1; - if (!R) - return 1; - // Range metadata is a sequence of numbers. Make sure they are the same - // sequence. - // TODO: Note that as this is metadata, it is possible to drop and/or merge - // this data when considering functions to merge. Thus this comparison would - // return 0 (i.e. equivalent), but merging would become more complicated - // because the ranges would need to be unioned. It is not likely that - // functions differ ONLY in this metadata if they are actually the same - // function semantically. - if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) - return Res; - for (size_t I = 0; I < L->getNumOperands(); ++I) { - ConstantInt *LLow = mdconst::extract<ConstantInt>(L->getOperand(I)); - ConstantInt *RLow = mdconst::extract<ConstantInt>(R->getOperand(I)); - if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue())) - return Res; - } - return 0; -} - -int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L, - const Instruction *R) const { - ImmutableCallSite LCS(L); - ImmutableCallSite RCS(R); - - assert(LCS && RCS && "Must be calls or invokes!"); - assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!"); - - if (int Res = - cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles())) - return Res; - - for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) { - auto OBL = LCS.getOperandBundleAt(i); - auto OBR = RCS.getOperandBundleAt(i); - - if (int Res = OBL.getTagName().compare(OBR.getTagName())) - return Res; - - if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size())) - return Res; - } - - return 0; -} - -/// Constants comparison: -/// 1. Check whether type of L constant could be losslessly bitcasted to R -/// type. -/// 2. Compare constant contents. -/// For more details see declaration comments. -int FunctionComparator::cmpConstants(const Constant *L, - const Constant *R) const { - Type *TyL = L->getType(); - Type *TyR = R->getType(); - - // Check whether types are bitcastable. This part is just re-factored - // Type::canLosslesslyBitCastTo method, but instead of returning true/false, - // we also pack into result which type is "less" for us. - int TypesRes = cmpTypes(TyL, TyR); - if (TypesRes != 0) { - // Types are different, but check whether we can bitcast them. - if (!TyL->isFirstClassType()) { - if (TyR->isFirstClassType()) - return -1; - // Neither TyL nor TyR are values of first class type. Return the result - // of comparing the types - return TypesRes; - } - if (!TyR->isFirstClassType()) { - if (TyL->isFirstClassType()) - return 1; - return TypesRes; - } - - // Vector -> Vector conversions are always lossless if the two vector types - // have the same size, otherwise not. - unsigned TyLWidth = 0; - unsigned TyRWidth = 0; - - if (auto *VecTyL = dyn_cast<VectorType>(TyL)) - TyLWidth = VecTyL->getBitWidth(); - if (auto *VecTyR = dyn_cast<VectorType>(TyR)) - TyRWidth = VecTyR->getBitWidth(); - - if (TyLWidth != TyRWidth) - return cmpNumbers(TyLWidth, TyRWidth); - - // Zero bit-width means neither TyL nor TyR are vectors. - if (!TyLWidth) { - PointerType *PTyL = dyn_cast<PointerType>(TyL); - PointerType *PTyR = dyn_cast<PointerType>(TyR); - if (PTyL && PTyR) { - unsigned AddrSpaceL = PTyL->getAddressSpace(); - unsigned AddrSpaceR = PTyR->getAddressSpace(); - if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR)) - return Res; - } - if (PTyL) - return 1; - if (PTyR) - return -1; - - // TyL and TyR aren't vectors, nor pointers. We don't know how to - // bitcast them. - return TypesRes; - } - } - - // OK, types are bitcastable, now check constant contents. - - if (L->isNullValue() && R->isNullValue()) - return TypesRes; - if (L->isNullValue() && !R->isNullValue()) - return 1; - if (!L->isNullValue() && R->isNullValue()) - return -1; - - auto GlobalValueL = const_cast<GlobalValue *>(dyn_cast<GlobalValue>(L)); - auto GlobalValueR = const_cast<GlobalValue *>(dyn_cast<GlobalValue>(R)); - if (GlobalValueL && GlobalValueR) { - return cmpGlobalValues(GlobalValueL, GlobalValueR); - } - - if (int Res = cmpNumbers(L->getValueID(), R->getValueID())) - return Res; - - if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) { - const auto *SeqR = cast<ConstantDataSequential>(R); - // This handles ConstantDataArray and ConstantDataVector. Note that we - // compare the two raw data arrays, which might differ depending on the host - // endianness. This isn't a problem though, because the endiness of a module - // will affect the order of the constants, but this order is the same - // for a given input module and host platform. - return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues()); - } - - switch (L->getValueID()) { - case Value::UndefValueVal: - case Value::ConstantTokenNoneVal: - return TypesRes; - case Value::ConstantIntVal: { - const APInt &LInt = cast<ConstantInt>(L)->getValue(); - const APInt &RInt = cast<ConstantInt>(R)->getValue(); - return cmpAPInts(LInt, RInt); - } - case Value::ConstantFPVal: { - const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF(); - const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF(); - return cmpAPFloats(LAPF, RAPF); - } - case Value::ConstantArrayVal: { - const ConstantArray *LA = cast<ConstantArray>(L); - const ConstantArray *RA = cast<ConstantArray>(R); - uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements(); - uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements(); - if (int Res = cmpNumbers(NumElementsL, NumElementsR)) - return Res; - for (uint64_t i = 0; i < NumElementsL; ++i) { - if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)), - cast<Constant>(RA->getOperand(i)))) - return Res; - } - return 0; - } - case Value::ConstantStructVal: { - const ConstantStruct *LS = cast<ConstantStruct>(L); - const ConstantStruct *RS = cast<ConstantStruct>(R); - unsigned NumElementsL = cast<StructType>(TyL)->getNumElements(); - unsigned NumElementsR = cast<StructType>(TyR)->getNumElements(); - if (int Res = cmpNumbers(NumElementsL, NumElementsR)) - return Res; - for (unsigned i = 0; i != NumElementsL; ++i) { - if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)), - cast<Constant>(RS->getOperand(i)))) - return Res; - } - return 0; - } - case Value::ConstantVectorVal: { - const ConstantVector *LV = cast<ConstantVector>(L); - const ConstantVector *RV = cast<ConstantVector>(R); - unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements(); - unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements(); - if (int Res = cmpNumbers(NumElementsL, NumElementsR)) - return Res; - for (uint64_t i = 0; i < NumElementsL; ++i) { - if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)), - cast<Constant>(RV->getOperand(i)))) - return Res; - } - return 0; - } - case Value::ConstantExprVal: { - const ConstantExpr *LE = cast<ConstantExpr>(L); - const ConstantExpr *RE = cast<ConstantExpr>(R); - unsigned NumOperandsL = LE->getNumOperands(); - unsigned NumOperandsR = RE->getNumOperands(); - if (int Res = cmpNumbers(NumOperandsL, NumOperandsR)) - return Res; - for (unsigned i = 0; i < NumOperandsL; ++i) { - if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)), - cast<Constant>(RE->getOperand(i)))) - return Res; - } - return 0; - } - case Value::BlockAddressVal: { - const BlockAddress *LBA = cast<BlockAddress>(L); - const BlockAddress *RBA = cast<BlockAddress>(R); - if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction())) - return Res; - if (LBA->getFunction() == RBA->getFunction()) { - // They are BBs in the same function. Order by which comes first in the - // BB order of the function. This order is deterministic. - Function* F = LBA->getFunction(); - BasicBlock *LBB = LBA->getBasicBlock(); - BasicBlock *RBB = RBA->getBasicBlock(); - if (LBB == RBB) - return 0; - for(BasicBlock &BB : F->getBasicBlockList()) { - if (&BB == LBB) { - assert(&BB != RBB); - return -1; - } - if (&BB == RBB) - return 1; - } - llvm_unreachable("Basic Block Address does not point to a basic block in " - "its function."); - return -1; - } else { - // cmpValues said the functions are the same. So because they aren't - // literally the same pointer, they must respectively be the left and - // right functions. - assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR); - // cmpValues will tell us if these are equivalent BasicBlocks, in the - // context of their respective functions. - return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock()); - } - } - default: // Unknown constant, abort. - LLVM_DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n"); - llvm_unreachable("Constant ValueID not recognized."); - return -1; - } -} - -int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue *R) const { - uint64_t LNumber = GlobalNumbers->getNumber(L); - uint64_t RNumber = GlobalNumbers->getNumber(R); - return cmpNumbers(LNumber, RNumber); -} - -/// cmpType - compares two types, -/// defines total ordering among the types set. -/// See method declaration comments for more details. -int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { - PointerType *PTyL = dyn_cast<PointerType>(TyL); - PointerType *PTyR = dyn_cast<PointerType>(TyR); - - const DataLayout &DL = FnL->getParent()->getDataLayout(); - if (PTyL && PTyL->getAddressSpace() == 0) - TyL = DL.getIntPtrType(TyL); - if (PTyR && PTyR->getAddressSpace() == 0) - TyR = DL.getIntPtrType(TyR); - - if (TyL == TyR) - return 0; - - if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID())) - return Res; - - switch (TyL->getTypeID()) { - default: - llvm_unreachable("Unknown type!"); - case Type::IntegerTyID: - return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(), - cast<IntegerType>(TyR)->getBitWidth()); - // TyL == TyR would have returned true earlier, because types are uniqued. - case Type::VoidTyID: - case Type::FloatTyID: - case Type::DoubleTyID: - case Type::X86_FP80TyID: - case Type::FP128TyID: - case Type::PPC_FP128TyID: - case Type::LabelTyID: - case Type::MetadataTyID: - case Type::TokenTyID: - return 0; - - case Type::PointerTyID: - assert(PTyL && PTyR && "Both types must be pointers here."); - return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace()); - - case Type::StructTyID: { - StructType *STyL = cast<StructType>(TyL); - StructType *STyR = cast<StructType>(TyR); - if (STyL->getNumElements() != STyR->getNumElements()) - return cmpNumbers(STyL->getNumElements(), STyR->getNumElements()); - - if (STyL->isPacked() != STyR->isPacked()) - return cmpNumbers(STyL->isPacked(), STyR->isPacked()); - - for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) { - if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i))) - return Res; - } - return 0; - } - - case Type::FunctionTyID: { - FunctionType *FTyL = cast<FunctionType>(TyL); - FunctionType *FTyR = cast<FunctionType>(TyR); - if (FTyL->getNumParams() != FTyR->getNumParams()) - return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams()); - - if (FTyL->isVarArg() != FTyR->isVarArg()) - return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg()); - - if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType())) - return Res; - - for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) { - if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i))) - return Res; - } - return 0; - } - - case Type::ArrayTyID: - case Type::VectorTyID: { - auto *STyL = cast<SequentialType>(TyL); - auto *STyR = cast<SequentialType>(TyR); - if (STyL->getNumElements() != STyR->getNumElements()) - return cmpNumbers(STyL->getNumElements(), STyR->getNumElements()); - return cmpTypes(STyL->getElementType(), STyR->getElementType()); - } - } -} - -// Determine whether the two operations are the same except that pointer-to-A -// and pointer-to-B are equivalent. This should be kept in sync with -// Instruction::isSameOperationAs. -// Read method declaration comments for more details. -int FunctionComparator::cmpOperations(const Instruction *L, - const Instruction *R, - bool &needToCmpOperands) const { - needToCmpOperands = true; - if (int Res = cmpValues(L, R)) - return Res; - - // Differences from Instruction::isSameOperationAs: - // * replace type comparison with calls to cmpTypes. - // * we test for I->getRawSubclassOptionalData (nuw/nsw/tail) at the top. - // * because of the above, we don't test for the tail bit on calls later on. - if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode())) - return Res; - - if (const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(L)) { - needToCmpOperands = false; - const GetElementPtrInst *GEPR = cast<GetElementPtrInst>(R); - if (int Res = - cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand())) - return Res; - return cmpGEPs(GEPL, GEPR); - } - - if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) - return Res; - - if (int Res = cmpTypes(L->getType(), R->getType())) - return Res; - - if (int Res = cmpNumbers(L->getRawSubclassOptionalData(), - R->getRawSubclassOptionalData())) - return Res; - - // We have two instructions of identical opcode and #operands. Check to see - // if all operands are the same type - for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) { - if (int Res = - cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType())) - return Res; - } - - // Check special state that is a part of some instructions. - if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) { - if (int Res = cmpTypes(AI->getAllocatedType(), - cast<AllocaInst>(R)->getAllocatedType())) - return Res; - return cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment()); - } - if (const LoadInst *LI = dyn_cast<LoadInst>(L)) { - if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile())) - return Res; - if (int Res = - cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment())) - return Res; - if (int Res = - cmpOrderings(LI->getOrdering(), cast<LoadInst>(R)->getOrdering())) - return Res; - if (int Res = cmpNumbers(LI->getSyncScopeID(), - cast<LoadInst>(R)->getSyncScopeID())) - return Res; - return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range), - cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range)); - } - if (const StoreInst *SI = dyn_cast<StoreInst>(L)) { - if (int Res = - cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile())) - return Res; - if (int Res = - cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment())) - return Res; - if (int Res = - cmpOrderings(SI->getOrdering(), cast<StoreInst>(R)->getOrdering())) - return Res; - return cmpNumbers(SI->getSyncScopeID(), - cast<StoreInst>(R)->getSyncScopeID()); - } - if (const CmpInst *CI = dyn_cast<CmpInst>(L)) - return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate()); - if (auto CSL = CallSite(const_cast<Instruction *>(L))) { - auto CSR = CallSite(const_cast<Instruction *>(R)); - if (int Res = cmpNumbers(CSL.getCallingConv(), CSR.getCallingConv())) - return Res; - if (int Res = cmpAttrs(CSL.getAttributes(), CSR.getAttributes())) - return Res; - if (int Res = cmpOperandBundlesSchema(L, R)) - return Res; - if (const CallInst *CI = dyn_cast<CallInst>(L)) - if (int Res = cmpNumbers(CI->getTailCallKind(), - cast<CallInst>(R)->getTailCallKind())) - return Res; - return cmpRangeMetadata(L->getMetadata(LLVMContext::MD_range), - R->getMetadata(LLVMContext::MD_range)); - } - if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) { - ArrayRef<unsigned> LIndices = IVI->getIndices(); - ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices(); - if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) - return Res; - for (size_t i = 0, e = LIndices.size(); i != e; ++i) { - if (int Res = cmpNumbers(LIndices[i], RIndices[i])) - return Res; - } - return 0; - } - if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) { - ArrayRef<unsigned> LIndices = EVI->getIndices(); - ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices(); - if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) - return Res; - for (size_t i = 0, e = LIndices.size(); i != e; ++i) { - if (int Res = cmpNumbers(LIndices[i], RIndices[i])) - return Res; - } - } - if (const FenceInst *FI = dyn_cast<FenceInst>(L)) { - if (int Res = - cmpOrderings(FI->getOrdering(), cast<FenceInst>(R)->getOrdering())) - return Res; - return cmpNumbers(FI->getSyncScopeID(), - cast<FenceInst>(R)->getSyncScopeID()); - } - if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) { - if (int Res = cmpNumbers(CXI->isVolatile(), - cast<AtomicCmpXchgInst>(R)->isVolatile())) - return Res; - if (int Res = cmpNumbers(CXI->isWeak(), - cast<AtomicCmpXchgInst>(R)->isWeak())) - return Res; - if (int Res = - cmpOrderings(CXI->getSuccessOrdering(), - cast<AtomicCmpXchgInst>(R)->getSuccessOrdering())) - return Res; - if (int Res = - cmpOrderings(CXI->getFailureOrdering(), - cast<AtomicCmpXchgInst>(R)->getFailureOrdering())) - return Res; - return cmpNumbers(CXI->getSyncScopeID(), - cast<AtomicCmpXchgInst>(R)->getSyncScopeID()); - } - if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) { - if (int Res = cmpNumbers(RMWI->getOperation(), - cast<AtomicRMWInst>(R)->getOperation())) - return Res; - if (int Res = cmpNumbers(RMWI->isVolatile(), - cast<AtomicRMWInst>(R)->isVolatile())) - return Res; - if (int Res = cmpOrderings(RMWI->getOrdering(), - cast<AtomicRMWInst>(R)->getOrdering())) - return Res; - return cmpNumbers(RMWI->getSyncScopeID(), - cast<AtomicRMWInst>(R)->getSyncScopeID()); - } - if (const PHINode *PNL = dyn_cast<PHINode>(L)) { - const PHINode *PNR = cast<PHINode>(R); - // Ensure that in addition to the incoming values being identical - // (checked by the caller of this function), the incoming blocks - // are also identical. - for (unsigned i = 0, e = PNL->getNumIncomingValues(); i != e; ++i) { - if (int Res = - cmpValues(PNL->getIncomingBlock(i), PNR->getIncomingBlock(i))) - return Res; - } - } - return 0; -} - -// Determine whether two GEP operations perform the same underlying arithmetic. -// Read method declaration comments for more details. -int FunctionComparator::cmpGEPs(const GEPOperator *GEPL, - const GEPOperator *GEPR) const { - unsigned int ASL = GEPL->getPointerAddressSpace(); - unsigned int ASR = GEPR->getPointerAddressSpace(); - - if (int Res = cmpNumbers(ASL, ASR)) - return Res; - - // When we have target data, we can reduce the GEP down to the value in bytes - // added to the address. - const DataLayout &DL = FnL->getParent()->getDataLayout(); - unsigned BitWidth = DL.getPointerSizeInBits(ASL); - APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0); - if (GEPL->accumulateConstantOffset(DL, OffsetL) && - GEPR->accumulateConstantOffset(DL, OffsetR)) - return cmpAPInts(OffsetL, OffsetR); - if (int Res = cmpTypes(GEPL->getSourceElementType(), - GEPR->getSourceElementType())) - return Res; - - if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands())) - return Res; - - for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) { - if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i))) - return Res; - } - - return 0; -} - -int FunctionComparator::cmpInlineAsm(const InlineAsm *L, - const InlineAsm *R) const { - // InlineAsm's are uniqued. If they are the same pointer, obviously they are - // the same, otherwise compare the fields. - if (L == R) - return 0; - if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType())) - return Res; - if (int Res = cmpMem(L->getAsmString(), R->getAsmString())) - return Res; - if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString())) - return Res; - if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects())) - return Res; - if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack())) - return Res; - if (int Res = cmpNumbers(L->getDialect(), R->getDialect())) - return Res; - assert(L->getFunctionType() != R->getFunctionType()); - return 0; -} - -/// Compare two values used by the two functions under pair-wise comparison. If -/// this is the first time the values are seen, they're added to the mapping so -/// that we will detect mismatches on next use. -/// See comments in declaration for more details. -int FunctionComparator::cmpValues(const Value *L, const Value *R) const { - // Catch self-reference case. - if (L == FnL) { - if (R == FnR) - return 0; - return -1; - } - if (R == FnR) { - if (L == FnL) - return 0; - return 1; - } - - const Constant *ConstL = dyn_cast<Constant>(L); - const Constant *ConstR = dyn_cast<Constant>(R); - if (ConstL && ConstR) { - if (L == R) - return 0; - return cmpConstants(ConstL, ConstR); - } - - if (ConstL) - return 1; - if (ConstR) - return -1; - - const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L); - const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R); - - if (InlineAsmL && InlineAsmR) - return cmpInlineAsm(InlineAsmL, InlineAsmR); - if (InlineAsmL) - return 1; - if (InlineAsmR) - return -1; - - auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())), - RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size())); - - return cmpNumbers(LeftSN.first->second, RightSN.first->second); -} - -// Test whether two basic blocks have equivalent behaviour. -int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL, - const BasicBlock *BBR) const { - BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end(); - BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end(); - - do { - bool needToCmpOperands = true; - if (int Res = cmpOperations(&*InstL, &*InstR, needToCmpOperands)) - return Res; - if (needToCmpOperands) { - assert(InstL->getNumOperands() == InstR->getNumOperands()); - - for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) { - Value *OpL = InstL->getOperand(i); - Value *OpR = InstR->getOperand(i); - if (int Res = cmpValues(OpL, OpR)) - return Res; - // cmpValues should ensure this is true. - assert(cmpTypes(OpL->getType(), OpR->getType()) == 0); - } - } - - ++InstL; - ++InstR; - } while (InstL != InstLE && InstR != InstRE); - - if (InstL != InstLE && InstR == InstRE) - return 1; - if (InstL == InstLE && InstR != InstRE) - return -1; - return 0; -} - -int FunctionComparator::compareSignature() const { - if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes())) - return Res; - - if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC())) - return Res; - - if (FnL->hasGC()) { - if (int Res = cmpMem(FnL->getGC(), FnR->getGC())) - return Res; - } - - if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection())) - return Res; - - if (FnL->hasSection()) { - if (int Res = cmpMem(FnL->getSection(), FnR->getSection())) - return Res; - } - - if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg())) - return Res; - - // TODO: if it's internal and only used in direct calls, we could handle this - // case too. - if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv())) - return Res; - - if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType())) - return Res; - - assert(FnL->arg_size() == FnR->arg_size() && - "Identically typed functions have different numbers of args!"); - - // Visit the arguments so that they get enumerated in the order they're - // passed in. - for (Function::const_arg_iterator ArgLI = FnL->arg_begin(), - ArgRI = FnR->arg_begin(), - ArgLE = FnL->arg_end(); - ArgLI != ArgLE; ++ArgLI, ++ArgRI) { - if (cmpValues(&*ArgLI, &*ArgRI) != 0) - llvm_unreachable("Arguments repeat!"); - } - return 0; -} - -// Test whether the two functions have equivalent behaviour. -int FunctionComparator::compare() { - beginCompare(); - - if (int Res = compareSignature()) - return Res; - - // We do a CFG-ordered walk since the actual ordering of the blocks in the - // linked list is immaterial. Our walk starts at the entry block for both - // functions, then takes each block from each terminator in order. As an - // artifact, this also means that unreachable blocks are ignored. - SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs; - SmallPtrSet<const BasicBlock *, 32> VisitedBBs; // in terms of F1. - - FnLBBs.push_back(&FnL->getEntryBlock()); - FnRBBs.push_back(&FnR->getEntryBlock()); - - VisitedBBs.insert(FnLBBs[0]); - while (!FnLBBs.empty()) { - const BasicBlock *BBL = FnLBBs.pop_back_val(); - const BasicBlock *BBR = FnRBBs.pop_back_val(); - - if (int Res = cmpValues(BBL, BBR)) - return Res; - - if (int Res = cmpBasicBlocks(BBL, BBR)) - return Res; - - const Instruction *TermL = BBL->getTerminator(); - const Instruction *TermR = BBR->getTerminator(); - - assert(TermL->getNumSuccessors() == TermR->getNumSuccessors()); - for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) { - if (!VisitedBBs.insert(TermL->getSuccessor(i)).second) - continue; - - FnLBBs.push_back(TermL->getSuccessor(i)); - FnRBBs.push_back(TermR->getSuccessor(i)); - } - } - return 0; -} - -namespace { - -// Accumulate the hash of a sequence of 64-bit integers. This is similar to a -// hash of a sequence of 64bit ints, but the entire input does not need to be -// available at once. This interface is necessary for functionHash because it -// needs to accumulate the hash as the structure of the function is traversed -// without saving these values to an intermediate buffer. This form of hashing -// is not often needed, as usually the object to hash is just read from a -// buffer. -class HashAccumulator64 { - uint64_t Hash; - -public: - // Initialize to random constant, so the state isn't zero. - HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; } - - void add(uint64_t V) { - Hash = hashing::detail::hash_16_bytes(Hash, V); - } - - // No finishing is required, because the entire hash value is used. - uint64_t getHash() { return Hash; } -}; - -} // end anonymous namespace - -// A function hash is calculated by considering only the number of arguments and -// whether a function is varargs, the order of basic blocks (given by the -// successors of each basic block in depth first order), and the order of -// opcodes of each instruction within each of these basic blocks. This mirrors -// the strategy compare() uses to compare functions by walking the BBs in depth -// first order and comparing each instruction in sequence. Because this hash -// does not look at the operands, it is insensitive to things such as the -// target of calls and the constants used in the function, which makes it useful -// when possibly merging functions which are the same modulo constants and call -// targets. -FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) { - HashAccumulator64 H; - H.add(F.isVarArg()); - H.add(F.arg_size()); - - SmallVector<const BasicBlock *, 8> BBs; - SmallPtrSet<const BasicBlock *, 16> VisitedBBs; - - // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(), - // accumulating the hash of the function "structure." (BB and opcode sequence) - BBs.push_back(&F.getEntryBlock()); - VisitedBBs.insert(BBs[0]); - while (!BBs.empty()) { - const BasicBlock *BB = BBs.pop_back_val(); - // This random value acts as a block header, as otherwise the partition of - // opcodes into BBs wouldn't affect the hash, only the order of the opcodes - H.add(45798); - for (auto &Inst : *BB) { - H.add(Inst.getOpcode()); - } - const Instruction *Term = BB->getTerminator(); - for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { - if (!VisitedBBs.insert(Term->getSuccessor(i)).second) - continue; - BBs.push_back(Term->getSuccessor(i)); - } - } - return H.getHash(); -} diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp deleted file mode 100644 index c9cc0990f237..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp +++ /dev/null @@ -1,313 +0,0 @@ -//===- lib/Transforms/Utils/FunctionImportUtils.cpp - Importing utilities -===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the FunctionImportGlobalProcessing class, used -// to perform the necessary global value handling for function importing. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/FunctionImportUtils.h" -#include "llvm/IR/InstIterator.h" -using namespace llvm; - -/// Checks if we should import SGV as a definition, otherwise import as a -/// declaration. -bool FunctionImportGlobalProcessing::doImportAsDefinition( - const GlobalValue *SGV, SetVector<GlobalValue *> *GlobalsToImport) { - - // Only import the globals requested for importing. - if (!GlobalsToImport->count(const_cast<GlobalValue *>(SGV))) - return false; - - assert(!isa<GlobalAlias>(SGV) && - "Unexpected global alias in the import list."); - - // Otherwise yes. - return true; -} - -bool FunctionImportGlobalProcessing::doImportAsDefinition( - const GlobalValue *SGV) { - if (!isPerformingImport()) - return false; - return FunctionImportGlobalProcessing::doImportAsDefinition(SGV, - GlobalsToImport); -} - -bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal( - const GlobalValue *SGV) { - assert(SGV->hasLocalLinkage()); - // Both the imported references and the original local variable must - // be promoted. - if (!isPerformingImport() && !isModuleExporting()) - return false; - - if (isPerformingImport()) { - assert((!GlobalsToImport->count(const_cast<GlobalValue *>(SGV)) || - !isNonRenamableLocal(*SGV)) && - "Attempting to promote non-renamable local"); - // We don't know for sure yet if we are importing this value (as either - // a reference or a def), since we are simply walking all values in the - // module. But by necessity if we end up importing it and it is local, - // it must be promoted, so unconditionally promote all values in the - // importing module. - return true; - } - - // When exporting, consult the index. We can have more than one local - // with the same GUID, in the case of same-named locals in different but - // same-named source files that were compiled in their respective directories - // (so the source file name and resulting GUID is the same). Find the one - // in this module. - auto Summary = ImportIndex.findSummaryInModule( - SGV->getGUID(), SGV->getParent()->getModuleIdentifier()); - assert(Summary && "Missing summary for global value when exporting"); - auto Linkage = Summary->linkage(); - if (!GlobalValue::isLocalLinkage(Linkage)) { - assert(!isNonRenamableLocal(*SGV) && - "Attempting to promote non-renamable local"); - return true; - } - - return false; -} - -#ifndef NDEBUG -bool FunctionImportGlobalProcessing::isNonRenamableLocal( - const GlobalValue &GV) const { - if (!GV.hasLocalLinkage()) - return false; - // This needs to stay in sync with the logic in buildModuleSummaryIndex. - if (GV.hasSection()) - return true; - if (Used.count(const_cast<GlobalValue *>(&GV))) - return true; - return false; -} -#endif - -std::string FunctionImportGlobalProcessing::getName(const GlobalValue *SGV, - bool DoPromote) { - // For locals that must be promoted to global scope, ensure that - // the promoted name uniquely identifies the copy in the original module, - // using the ID assigned during combined index creation. When importing, - // we rename all locals (not just those that are promoted) in order to - // avoid naming conflicts between locals imported from different modules. - if (SGV->hasLocalLinkage() && (DoPromote || isPerformingImport())) - return ModuleSummaryIndex::getGlobalNameForLocal( - SGV->getName(), - ImportIndex.getModuleHash(SGV->getParent()->getModuleIdentifier())); - return SGV->getName(); -} - -GlobalValue::LinkageTypes -FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV, - bool DoPromote) { - // Any local variable that is referenced by an exported function needs - // to be promoted to global scope. Since we don't currently know which - // functions reference which local variables/functions, we must treat - // all as potentially exported if this module is exporting anything. - if (isModuleExporting()) { - if (SGV->hasLocalLinkage() && DoPromote) - return GlobalValue::ExternalLinkage; - return SGV->getLinkage(); - } - - // Otherwise, if we aren't importing, no linkage change is needed. - if (!isPerformingImport()) - return SGV->getLinkage(); - - switch (SGV->getLinkage()) { - case GlobalValue::LinkOnceODRLinkage: - case GlobalValue::ExternalLinkage: - // External and linkonce definitions are converted to available_externally - // definitions upon import, so that they are available for inlining - // and/or optimization, but are turned into declarations later - // during the EliminateAvailableExternally pass. - if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV)) - return GlobalValue::AvailableExternallyLinkage; - // An imported external declaration stays external. - return SGV->getLinkage(); - - case GlobalValue::AvailableExternallyLinkage: - // An imported available_externally definition converts - // to external if imported as a declaration. - if (!doImportAsDefinition(SGV)) - return GlobalValue::ExternalLinkage; - // An imported available_externally declaration stays that way. - return SGV->getLinkage(); - - case GlobalValue::LinkOnceAnyLinkage: - case GlobalValue::WeakAnyLinkage: - // Can't import linkonce_any/weak_any definitions correctly, or we might - // change the program semantics, since the linker will pick the first - // linkonce_any/weak_any definition and importing would change the order - // they are seen by the linker. The module linking caller needs to enforce - // this. - assert(!doImportAsDefinition(SGV)); - // If imported as a declaration, it becomes external_weak. - return SGV->getLinkage(); - - case GlobalValue::WeakODRLinkage: - // For weak_odr linkage, there is a guarantee that all copies will be - // equivalent, so the issue described above for weak_any does not exist, - // and the definition can be imported. It can be treated similarly - // to an imported externally visible global value. - if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV)) - return GlobalValue::AvailableExternallyLinkage; - else - return GlobalValue::ExternalLinkage; - - case GlobalValue::AppendingLinkage: - // It would be incorrect to import an appending linkage variable, - // since it would cause global constructors/destructors to be - // executed multiple times. This should have already been handled - // by linkIfNeeded, and we will assert in shouldLinkFromSource - // if we try to import, so we simply return AppendingLinkage. - return GlobalValue::AppendingLinkage; - - case GlobalValue::InternalLinkage: - case GlobalValue::PrivateLinkage: - // If we are promoting the local to global scope, it is handled - // similarly to a normal externally visible global. - if (DoPromote) { - if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV)) - return GlobalValue::AvailableExternallyLinkage; - else - return GlobalValue::ExternalLinkage; - } - // A non-promoted imported local definition stays local. - // The ThinLTO pass will eventually force-import their definitions. - return SGV->getLinkage(); - - case GlobalValue::ExternalWeakLinkage: - // External weak doesn't apply to definitions, must be a declaration. - assert(!doImportAsDefinition(SGV)); - // Linkage stays external_weak. - return SGV->getLinkage(); - - case GlobalValue::CommonLinkage: - // Linkage stays common on definitions. - // The ThinLTO pass will eventually force-import their definitions. - return SGV->getLinkage(); - } - - llvm_unreachable("unknown linkage type"); -} - -void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { - - ValueInfo VI; - if (GV.hasName()) { - VI = ImportIndex.getValueInfo(GV.getGUID()); - // Set synthetic function entry counts. - if (VI && ImportIndex.hasSyntheticEntryCounts()) { - if (Function *F = dyn_cast<Function>(&GV)) { - if (!F->isDeclaration()) { - for (auto &S : VI.getSummaryList()) { - FunctionSummary *FS = dyn_cast<FunctionSummary>(S->getBaseObject()); - if (FS->modulePath() == M.getModuleIdentifier()) { - F->setEntryCount(Function::ProfileCount(FS->entryCount(), - Function::PCT_Synthetic)); - break; - } - } - } - } - } - // Check the summaries to see if the symbol gets resolved to a known local - // definition. - if (VI && VI.isDSOLocal()) { - GV.setDSOLocal(true); - if (GV.hasDLLImportStorageClass()) - GV.setDLLStorageClass(GlobalValue::DefaultStorageClass); - } - } - - // Mark read/write-only variables which can be imported with specific - // attribute. We can't internalize them now because IRMover will fail - // to link variable definitions to their external declarations during - // ThinLTO import. We'll internalize read-only variables later, after - // import is finished. See internalizeGVsAfterImport. - // - // If global value dead stripping is not enabled in summary then - // propagateConstants hasn't been run. We can't internalize GV - // in such case. - if (!GV.isDeclaration() && VI && ImportIndex.withGlobalValueDeadStripping()) { - const auto &SL = VI.getSummaryList(); - auto *GVS = SL.empty() ? nullptr : dyn_cast<GlobalVarSummary>(SL[0].get()); - // At this stage "maybe" is "definitely" - if (GVS && (GVS->maybeReadOnly() || GVS->maybeWriteOnly())) - cast<GlobalVariable>(&GV)->addAttribute("thinlto-internalize"); - } - - bool DoPromote = false; - if (GV.hasLocalLinkage() && - ((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) { - // Save the original name string before we rename GV below. - auto Name = GV.getName().str(); - // Once we change the name or linkage it is difficult to determine - // again whether we should promote since shouldPromoteLocalToGlobal needs - // to locate the summary (based on GUID from name and linkage). Therefore, - // use DoPromote result saved above. - GV.setName(getName(&GV, DoPromote)); - GV.setLinkage(getLinkage(&GV, DoPromote)); - if (!GV.hasLocalLinkage()) - GV.setVisibility(GlobalValue::HiddenVisibility); - - // If we are renaming a COMDAT leader, ensure that we record the COMDAT - // for later renaming as well. This is required for COFF. - if (const auto *C = GV.getComdat()) - if (C->getName() == Name) - RenamedComdats.try_emplace(C, M.getOrInsertComdat(GV.getName())); - } else - GV.setLinkage(getLinkage(&GV, /* DoPromote */ false)); - - // Remove functions imported as available externally defs from comdats, - // as this is a declaration for the linker, and will be dropped eventually. - // It is illegal for comdats to contain declarations. - auto *GO = dyn_cast<GlobalObject>(&GV); - if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) { - // The IRMover should not have placed any imported declarations in - // a comdat, so the only declaration that should be in a comdat - // at this point would be a definition imported as available_externally. - assert(GO->hasAvailableExternallyLinkage() && - "Expected comdat on definition (possibly available external)"); - GO->setComdat(nullptr); - } -} - -void FunctionImportGlobalProcessing::processGlobalsForThinLTO() { - for (GlobalVariable &GV : M.globals()) - processGlobalForThinLTO(GV); - for (Function &SF : M) - processGlobalForThinLTO(SF); - for (GlobalAlias &GA : M.aliases()) - processGlobalForThinLTO(GA); - - // Replace any COMDATS that required renaming (because the COMDAT leader was - // promoted and renamed). - if (!RenamedComdats.empty()) - for (auto &GO : M.global_objects()) - if (auto *C = GO.getComdat()) { - auto Replacement = RenamedComdats.find(C); - if (Replacement != RenamedComdats.end()) - GO.setComdat(Replacement->second); - } -} - -bool FunctionImportGlobalProcessing::run() { - processGlobalsForThinLTO(); - return false; -} - -bool llvm::renameModuleForThinLTO(Module &M, const ModuleSummaryIndex &Index, - SetVector<GlobalValue *> *GlobalsToImport) { - FunctionImportGlobalProcessing ThinLTOProcessing(M, Index, GlobalsToImport); - return ThinLTOProcessing.run(); -} diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp deleted file mode 100644 index a2942869130d..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp +++ /dev/null @@ -1,194 +0,0 @@ -//===-- GlobalStatus.cpp - Compute status info for globals -----------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/GlobalStatus.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Use.h" -#include "llvm/IR/User.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/AtomicOrdering.h" -#include "llvm/Support/Casting.h" -#include <algorithm> -#include <cassert> - -using namespace llvm; - -/// Return the stronger of the two ordering. If the two orderings are acquire -/// and release, then return AcquireRelease. -/// -static AtomicOrdering strongerOrdering(AtomicOrdering X, AtomicOrdering Y) { - if ((X == AtomicOrdering::Acquire && Y == AtomicOrdering::Release) || - (Y == AtomicOrdering::Acquire && X == AtomicOrdering::Release)) - return AtomicOrdering::AcquireRelease; - return (AtomicOrdering)std::max((unsigned)X, (unsigned)Y); -} - -/// It is safe to destroy a constant iff it is only used by constants itself. -/// Note that constants cannot be cyclic, so this test is pretty easy to -/// implement recursively. -/// -bool llvm::isSafeToDestroyConstant(const Constant *C) { - if (isa<GlobalValue>(C)) - return false; - - if (isa<ConstantData>(C)) - return false; - - for (const User *U : C->users()) - if (const Constant *CU = dyn_cast<Constant>(U)) { - if (!isSafeToDestroyConstant(CU)) - return false; - } else - return false; - return true; -} - -static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, - SmallPtrSetImpl<const Value *> &VisitedUsers) { - if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) - if (GV->isExternallyInitialized()) - GS.StoredType = GlobalStatus::StoredOnce; - - for (const Use &U : V->uses()) { - const User *UR = U.getUser(); - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) { - GS.HasNonInstructionUser = true; - - // If the result of the constantexpr isn't pointer type, then we won't - // know to expect it in various places. Just reject early. - if (!isa<PointerType>(CE->getType())) - return true; - - // FIXME: Do we need to add constexpr selects to VisitedUsers? - if (analyzeGlobalAux(CE, GS, VisitedUsers)) - return true; - } else if (const Instruction *I = dyn_cast<Instruction>(UR)) { - if (!GS.HasMultipleAccessingFunctions) { - const Function *F = I->getParent()->getParent(); - if (!GS.AccessingFunction) - GS.AccessingFunction = F; - else if (GS.AccessingFunction != F) - GS.HasMultipleAccessingFunctions = true; - } - if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { - GS.IsLoaded = true; - // Don't hack on volatile loads. - if (LI->isVolatile()) - return true; - GS.Ordering = strongerOrdering(GS.Ordering, LI->getOrdering()); - } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) { - // Don't allow a store OF the address, only stores TO the address. - if (SI->getOperand(0) == V) - return true; - - // Don't hack on volatile stores. - if (SI->isVolatile()) - return true; - - GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering()); - - // If this is a direct store to the global (i.e., the global is a scalar - // value, not an aggregate), keep more specific information about - // stores. - if (GS.StoredType != GlobalStatus::Stored) { - if (const GlobalVariable *GV = - dyn_cast<GlobalVariable>(SI->getOperand(1))) { - Value *StoredVal = SI->getOperand(0); - - if (Constant *C = dyn_cast<Constant>(StoredVal)) { - if (C->isThreadDependent()) { - // The stored value changes between threads; don't track it. - return true; - } - } - - if (GV->hasInitializer() && StoredVal == GV->getInitializer()) { - if (GS.StoredType < GlobalStatus::InitializerStored) - GS.StoredType = GlobalStatus::InitializerStored; - } else if (isa<LoadInst>(StoredVal) && - cast<LoadInst>(StoredVal)->getOperand(0) == GV) { - if (GS.StoredType < GlobalStatus::InitializerStored) - GS.StoredType = GlobalStatus::InitializerStored; - } else if (GS.StoredType < GlobalStatus::StoredOnce) { - GS.StoredType = GlobalStatus::StoredOnce; - GS.StoredOnceValue = StoredVal; - } else if (GS.StoredType == GlobalStatus::StoredOnce && - GS.StoredOnceValue == StoredVal) { - // noop. - } else { - GS.StoredType = GlobalStatus::Stored; - } - } else { - GS.StoredType = GlobalStatus::Stored; - } - } - } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) { - // Skip over bitcasts and GEPs; we don't care about the type or offset - // of the pointer. - if (analyzeGlobalAux(I, GS, VisitedUsers)) - return true; - } else if (isa<SelectInst>(I) || isa<PHINode>(I)) { - // Look through selects and PHIs to find if the pointer is - // conditionally accessed. Make sure we only visit an instruction - // once; otherwise, we can get infinite recursion or exponential - // compile time. - if (VisitedUsers.insert(I).second) - if (analyzeGlobalAux(I, GS, VisitedUsers)) - return true; - } else if (isa<CmpInst>(I)) { - GS.IsCompared = true; - } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) { - if (MTI->isVolatile()) - return true; - if (MTI->getArgOperand(0) == V) - GS.StoredType = GlobalStatus::Stored; - if (MTI->getArgOperand(1) == V) - GS.IsLoaded = true; - } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) { - assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!"); - if (MSI->isVolatile()) - return true; - GS.StoredType = GlobalStatus::Stored; - } else if (auto C = ImmutableCallSite(I)) { - if (!C.isCallee(&U)) - return true; - GS.IsLoaded = true; - } else { - return true; // Any other non-load instruction might take address! - } - } else if (const Constant *C = dyn_cast<Constant>(UR)) { - GS.HasNonInstructionUser = true; - // We might have a dead and dangling constant hanging off of here. - if (!isSafeToDestroyConstant(C)) - return true; - } else { - GS.HasNonInstructionUser = true; - // Otherwise must be some other user. - return true; - } - } - - return false; -} - -GlobalStatus::GlobalStatus() = default; - -bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) { - SmallPtrSet<const Value *, 16> VisitedUsers; - return analyzeGlobalAux(V, GS, VisitedUsers); -} diff --git a/contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp b/contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp deleted file mode 100644 index 34c32d9c0c98..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp +++ /dev/null @@ -1,63 +0,0 @@ -//===-- GuardUtils.cpp - Utils for work with guards -------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Utils that are used to perform transformations related to guards and their -// conditions. -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/GuardUtils.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/MDBuilder.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" - -using namespace llvm; - -static cl::opt<uint32_t> PredicatePassBranchWeight( - "guards-predicate-pass-branch-weight", cl::Hidden, cl::init(1 << 20), - cl::desc("The probability of a guard failing is assumed to be the " - "reciprocal of this value (default = 1 << 20)")); - -void llvm::makeGuardControlFlowExplicit(Function *DeoptIntrinsic, - CallInst *Guard) { - OperandBundleDef DeoptOB(*Guard->getOperandBundle(LLVMContext::OB_deopt)); - SmallVector<Value *, 4> Args(std::next(Guard->arg_begin()), Guard->arg_end()); - - auto *CheckBB = Guard->getParent(); - auto *DeoptBlockTerm = - SplitBlockAndInsertIfThen(Guard->getArgOperand(0), Guard, true); - - auto *CheckBI = cast<BranchInst>(CheckBB->getTerminator()); - - // SplitBlockAndInsertIfThen inserts control flow that branches to - // DeoptBlockTerm if the condition is true. We want the opposite. - CheckBI->swapSuccessors(); - - CheckBI->getSuccessor(0)->setName("guarded"); - CheckBI->getSuccessor(1)->setName("deopt"); - - if (auto *MD = Guard->getMetadata(LLVMContext::MD_make_implicit)) - CheckBI->setMetadata(LLVMContext::MD_make_implicit, MD); - - MDBuilder MDB(Guard->getContext()); - CheckBI->setMetadata(LLVMContext::MD_prof, - MDB.createBranchWeights(PredicatePassBranchWeight, 1)); - - IRBuilder<> B(DeoptBlockTerm); - auto *DeoptCall = B.CreateCall(DeoptIntrinsic, Args, {DeoptOB}, ""); - - if (DeoptIntrinsic->getReturnType()->isVoidTy()) { - B.CreateRetVoid(); - } else { - DeoptCall->setName("deoptcall"); - B.CreateRet(DeoptCall); - } - - DeoptCall->setCallingConv(Guard->getCallingConv()); - DeoptBlockTerm->eraseFromParent(); -} diff --git a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp deleted file mode 100644 index 8041e66e6c4c..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp +++ /dev/null @@ -1,202 +0,0 @@ -//===-- ImportedFunctionsInliningStats.cpp ----------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Generating inliner statistics for imported functions, mostly useful for -// ThinLTO. -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include <algorithm> -#include <iomanip> -#include <sstream> -using namespace llvm; - -ImportedFunctionsInliningStatistics::InlineGraphNode & -ImportedFunctionsInliningStatistics::createInlineGraphNode(const Function &F) { - - auto &ValueLookup = NodesMap[F.getName()]; - if (!ValueLookup) { - ValueLookup = llvm::make_unique<InlineGraphNode>(); - ValueLookup->Imported = F.getMetadata("thinlto_src_module") != nullptr; - } - return *ValueLookup; -} - -void ImportedFunctionsInliningStatistics::recordInline(const Function &Caller, - const Function &Callee) { - - InlineGraphNode &CallerNode = createInlineGraphNode(Caller); - InlineGraphNode &CalleeNode = createInlineGraphNode(Callee); - CalleeNode.NumberOfInlines++; - - if (!CallerNode.Imported && !CalleeNode.Imported) { - // Direct inline from not imported callee to not imported caller, so we - // don't have to add this to graph. It might be very helpful if you wanna - // get the inliner statistics in compile step where there are no imported - // functions. In this case the graph would be empty. - CalleeNode.NumberOfRealInlines++; - return; - } - - CallerNode.InlinedCallees.push_back(&CalleeNode); - if (!CallerNode.Imported) { - // We could avoid second lookup, but it would make the code ultra ugly. - auto It = NodesMap.find(Caller.getName()); - assert(It != NodesMap.end() && "The node should be already there."); - // Save Caller as a starting node for traversal. The string has to be one - // from map because Caller can disappear (and function name with it). - NonImportedCallers.push_back(It->first()); - } -} - -void ImportedFunctionsInliningStatistics::setModuleInfo(const Module &M) { - ModuleName = M.getName(); - for (const auto &F : M.functions()) { - if (F.isDeclaration()) - continue; - AllFunctions++; - ImportedFunctions += int(F.getMetadata("thinlto_src_module") != nullptr); - } -} -static std::string getStatString(const char *Msg, int32_t Fraction, int32_t All, - const char *PercentageOfMsg, - bool LineEnd = true) { - double Result = 0; - if (All != 0) - Result = 100 * static_cast<double>(Fraction) / All; - - std::stringstream Str; - Str << std::setprecision(4) << Msg << ": " << Fraction << " [" << Result - << "% of " << PercentageOfMsg << "]"; - if (LineEnd) - Str << "\n"; - return Str.str(); -} - -void ImportedFunctionsInliningStatistics::dump(const bool Verbose) { - calculateRealInlines(); - NonImportedCallers.clear(); - - int32_t InlinedImportedFunctionsCount = 0; - int32_t InlinedNotImportedFunctionsCount = 0; - - int32_t InlinedImportedFunctionsToImportingModuleCount = 0; - int32_t InlinedNotImportedFunctionsToImportingModuleCount = 0; - - const auto SortedNodes = getSortedNodes(); - std::string Out; - Out.reserve(5000); - raw_string_ostream Ostream(Out); - - Ostream << "------- Dumping inliner stats for [" << ModuleName - << "] -------\n"; - - if (Verbose) - Ostream << "-- List of inlined functions:\n"; - - for (const auto &Node : SortedNodes) { - assert(Node->second->NumberOfInlines >= Node->second->NumberOfRealInlines); - if (Node->second->NumberOfInlines == 0) - continue; - - if (Node->second->Imported) { - InlinedImportedFunctionsCount++; - InlinedImportedFunctionsToImportingModuleCount += - int(Node->second->NumberOfRealInlines > 0); - } else { - InlinedNotImportedFunctionsCount++; - InlinedNotImportedFunctionsToImportingModuleCount += - int(Node->second->NumberOfRealInlines > 0); - } - - if (Verbose) - Ostream << "Inlined " - << (Node->second->Imported ? "imported " : "not imported ") - << "function [" << Node->first() << "]" - << ": #inlines = " << Node->second->NumberOfInlines - << ", #inlines_to_importing_module = " - << Node->second->NumberOfRealInlines << "\n"; - } - - auto InlinedFunctionsCount = - InlinedImportedFunctionsCount + InlinedNotImportedFunctionsCount; - auto NotImportedFuncCount = AllFunctions - ImportedFunctions; - auto ImportedNotInlinedIntoModule = - ImportedFunctions - InlinedImportedFunctionsToImportingModuleCount; - - Ostream << "-- Summary:\n" - << "All functions: " << AllFunctions - << ", imported functions: " << ImportedFunctions << "\n" - << getStatString("inlined functions", InlinedFunctionsCount, - AllFunctions, "all functions") - << getStatString("imported functions inlined anywhere", - InlinedImportedFunctionsCount, ImportedFunctions, - "imported functions") - << getStatString("imported functions inlined into importing module", - InlinedImportedFunctionsToImportingModuleCount, - ImportedFunctions, "imported functions", - /*LineEnd=*/false) - << getStatString(", remaining", ImportedNotInlinedIntoModule, - ImportedFunctions, "imported functions") - << getStatString("non-imported functions inlined anywhere", - InlinedNotImportedFunctionsCount, - NotImportedFuncCount, "non-imported functions") - << getStatString( - "non-imported functions inlined into importing module", - InlinedNotImportedFunctionsToImportingModuleCount, - NotImportedFuncCount, "non-imported functions"); - Ostream.flush(); - dbgs() << Out; -} - -void ImportedFunctionsInliningStatistics::calculateRealInlines() { - // Removing duplicated Callers. - llvm::sort(NonImportedCallers); - NonImportedCallers.erase( - std::unique(NonImportedCallers.begin(), NonImportedCallers.end()), - NonImportedCallers.end()); - - for (const auto &Name : NonImportedCallers) { - auto &Node = *NodesMap[Name]; - if (!Node.Visited) - dfs(Node); - } -} - -void ImportedFunctionsInliningStatistics::dfs(InlineGraphNode &GraphNode) { - assert(!GraphNode.Visited); - GraphNode.Visited = true; - for (auto *const InlinedFunctionNode : GraphNode.InlinedCallees) { - InlinedFunctionNode->NumberOfRealInlines++; - if (!InlinedFunctionNode->Visited) - dfs(*InlinedFunctionNode); - } -} - -ImportedFunctionsInliningStatistics::SortedNodesTy -ImportedFunctionsInliningStatistics::getSortedNodes() { - SortedNodesTy SortedNodes; - SortedNodes.reserve(NodesMap.size()); - for (const NodesMapTy::value_type& Node : NodesMap) - SortedNodes.push_back(&Node); - - llvm::sort(SortedNodes, [&](const SortedNodesTy::value_type &Lhs, - const SortedNodesTy::value_type &Rhs) { - if (Lhs->second->NumberOfInlines != Rhs->second->NumberOfInlines) - return Lhs->second->NumberOfInlines > Rhs->second->NumberOfInlines; - if (Lhs->second->NumberOfRealInlines != Rhs->second->NumberOfRealInlines) - return Lhs->second->NumberOfRealInlines > - Rhs->second->NumberOfRealInlines; - return Lhs->first() < Rhs->first(); - }); - return SortedNodes; -} diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp deleted file mode 100644 index a7f0f7ac5d61..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ /dev/null @@ -1,2417 +0,0 @@ -//===- InlineFunction.cpp - Code to perform function inlining -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements inlining of a function into a call site, resolving -// parameters and the return value as appropriate. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Analysis/EHPersonalities.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/VectorUtils.h" -#include "llvm/IR/Argument.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DIBuilder.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/DebugLoc.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/User.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -#include <algorithm> -#include <cassert> -#include <cstdint> -#include <iterator> -#include <limits> -#include <string> -#include <utility> -#include <vector> - -using namespace llvm; -using ProfileCount = Function::ProfileCount; - -static cl::opt<bool> -EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), - cl::Hidden, - cl::desc("Convert noalias attributes to metadata during inlining.")); - -static cl::opt<bool> -PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", - cl::init(true), cl::Hidden, - cl::desc("Convert align attributes to assumptions during inlining.")); - -llvm::InlineResult llvm::InlineFunction(CallBase *CB, InlineFunctionInfo &IFI, - AAResults *CalleeAAR, - bool InsertLifetime) { - return InlineFunction(CallSite(CB), IFI, CalleeAAR, InsertLifetime); -} - -namespace { - - /// A class for recording information about inlining a landing pad. - class LandingPadInliningInfo { - /// Destination of the invoke's unwind. - BasicBlock *OuterResumeDest; - - /// Destination for the callee's resume. - BasicBlock *InnerResumeDest = nullptr; - - /// LandingPadInst associated with the invoke. - LandingPadInst *CallerLPad = nullptr; - - /// PHI for EH values from landingpad insts. - PHINode *InnerEHValuesPHI = nullptr; - - SmallVector<Value*, 8> UnwindDestPHIValues; - - public: - LandingPadInliningInfo(InvokeInst *II) - : OuterResumeDest(II->getUnwindDest()) { - // If there are PHI nodes in the unwind destination block, we need to keep - // track of which values came into them from the invoke before removing - // the edge from this block. - BasicBlock *InvokeBB = II->getParent(); - BasicBlock::iterator I = OuterResumeDest->begin(); - for (; isa<PHINode>(I); ++I) { - // Save the value to use for this edge. - PHINode *PHI = cast<PHINode>(I); - UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); - } - - CallerLPad = cast<LandingPadInst>(I); - } - - /// The outer unwind destination is the target of - /// unwind edges introduced for calls within the inlined function. - BasicBlock *getOuterResumeDest() const { - return OuterResumeDest; - } - - BasicBlock *getInnerResumeDest(); - - LandingPadInst *getLandingPadInst() const { return CallerLPad; } - - /// Forward the 'resume' instruction to the caller's landing pad block. - /// When the landing pad block has only one predecessor, this is - /// a simple branch. When there is more than one predecessor, we need to - /// split the landing pad block after the landingpad instruction and jump - /// to there. - void forwardResume(ResumeInst *RI, - SmallPtrSetImpl<LandingPadInst*> &InlinedLPads); - - /// Add incoming-PHI values to the unwind destination block for the given - /// basic block, using the values for the original invoke's source block. - void addIncomingPHIValuesFor(BasicBlock *BB) const { - addIncomingPHIValuesForInto(BB, OuterResumeDest); - } - - void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const { - BasicBlock::iterator I = dest->begin(); - for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { - PHINode *phi = cast<PHINode>(I); - phi->addIncoming(UnwindDestPHIValues[i], src); - } - } - }; - -} // end anonymous namespace - -/// Get or create a target for the branch from ResumeInsts. -BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { - if (InnerResumeDest) return InnerResumeDest; - - // Split the landing pad. - BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator(); - InnerResumeDest = - OuterResumeDest->splitBasicBlock(SplitPoint, - OuterResumeDest->getName() + ".body"); - - // The number of incoming edges we expect to the inner landing pad. - const unsigned PHICapacity = 2; - - // Create corresponding new PHIs for all the PHIs in the outer landing pad. - Instruction *InsertPoint = &InnerResumeDest->front(); - BasicBlock::iterator I = OuterResumeDest->begin(); - for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { - PHINode *OuterPHI = cast<PHINode>(I); - PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity, - OuterPHI->getName() + ".lpad-body", - InsertPoint); - OuterPHI->replaceAllUsesWith(InnerPHI); - InnerPHI->addIncoming(OuterPHI, OuterResumeDest); - } - - // Create a PHI for the exception values. - InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity, - "eh.lpad-body", InsertPoint); - CallerLPad->replaceAllUsesWith(InnerEHValuesPHI); - InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest); - - // All done. - return InnerResumeDest; -} - -/// Forward the 'resume' instruction to the caller's landing pad block. -/// When the landing pad block has only one predecessor, this is a simple -/// branch. When there is more than one predecessor, we need to split the -/// landing pad block after the landingpad instruction and jump to there. -void LandingPadInliningInfo::forwardResume( - ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) { - BasicBlock *Dest = getInnerResumeDest(); - BasicBlock *Src = RI->getParent(); - - BranchInst::Create(Dest, Src); - - // Update the PHIs in the destination. They were inserted in an order which - // makes this work. - addIncomingPHIValuesForInto(Src, Dest); - - InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); - RI->eraseFromParent(); -} - -/// Helper for getUnwindDestToken/getUnwindDestTokenHelper. -static Value *getParentPad(Value *EHPad) { - if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad)) - return FPI->getParentPad(); - return cast<CatchSwitchInst>(EHPad)->getParentPad(); -} - -using UnwindDestMemoTy = DenseMap<Instruction *, Value *>; - -/// Helper for getUnwindDestToken that does the descendant-ward part of -/// the search. -static Value *getUnwindDestTokenHelper(Instruction *EHPad, - UnwindDestMemoTy &MemoMap) { - SmallVector<Instruction *, 8> Worklist(1, EHPad); - - while (!Worklist.empty()) { - Instruction *CurrentPad = Worklist.pop_back_val(); - // We only put pads on the worklist that aren't in the MemoMap. When - // we find an unwind dest for a pad we may update its ancestors, but - // the queue only ever contains uncles/great-uncles/etc. of CurrentPad, - // so they should never get updated while queued on the worklist. - assert(!MemoMap.count(CurrentPad)); - Value *UnwindDestToken = nullptr; - if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) { - if (CatchSwitch->hasUnwindDest()) { - UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI(); - } else { - // Catchswitch doesn't have a 'nounwind' variant, and one might be - // annotated as "unwinds to caller" when really it's nounwind (see - // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the - // parent's unwind dest from this. We can check its catchpads' - // descendants, since they might include a cleanuppad with an - // "unwinds to caller" cleanupret, which can be trusted. - for (auto HI = CatchSwitch->handler_begin(), - HE = CatchSwitch->handler_end(); - HI != HE && !UnwindDestToken; ++HI) { - BasicBlock *HandlerBlock = *HI; - auto *CatchPad = cast<CatchPadInst>(HandlerBlock->getFirstNonPHI()); - for (User *Child : CatchPad->users()) { - // Intentionally ignore invokes here -- since the catchswitch is - // marked "unwind to caller", it would be a verifier error if it - // contained an invoke which unwinds out of it, so any invoke we'd - // encounter must unwind to some child of the catch. - if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child)) - continue; - - Instruction *ChildPad = cast<Instruction>(Child); - auto Memo = MemoMap.find(ChildPad); - if (Memo == MemoMap.end()) { - // Haven't figured out this child pad yet; queue it. - Worklist.push_back(ChildPad); - continue; - } - // We've already checked this child, but might have found that - // it offers no proof either way. - Value *ChildUnwindDestToken = Memo->second; - if (!ChildUnwindDestToken) - continue; - // We already know the child's unwind dest, which can either - // be ConstantTokenNone to indicate unwind to caller, or can - // be another child of the catchpad. Only the former indicates - // the unwind dest of the catchswitch. - if (isa<ConstantTokenNone>(ChildUnwindDestToken)) { - UnwindDestToken = ChildUnwindDestToken; - break; - } - assert(getParentPad(ChildUnwindDestToken) == CatchPad); - } - } - } - } else { - auto *CleanupPad = cast<CleanupPadInst>(CurrentPad); - for (User *U : CleanupPad->users()) { - if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) { - if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest()) - UnwindDestToken = RetUnwindDest->getFirstNonPHI(); - else - UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext()); - break; - } - Value *ChildUnwindDestToken; - if (auto *Invoke = dyn_cast<InvokeInst>(U)) { - ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI(); - } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) { - Instruction *ChildPad = cast<Instruction>(U); - auto Memo = MemoMap.find(ChildPad); - if (Memo == MemoMap.end()) { - // Haven't resolved this child yet; queue it and keep searching. - Worklist.push_back(ChildPad); - continue; - } - // We've checked this child, but still need to ignore it if it - // had no proof either way. - ChildUnwindDestToken = Memo->second; - if (!ChildUnwindDestToken) - continue; - } else { - // Not a relevant user of the cleanuppad - continue; - } - // In a well-formed program, the child/invoke must either unwind to - // an(other) child of the cleanup, or exit the cleanup. In the - // first case, continue searching. - if (isa<Instruction>(ChildUnwindDestToken) && - getParentPad(ChildUnwindDestToken) == CleanupPad) - continue; - UnwindDestToken = ChildUnwindDestToken; - break; - } - } - // If we haven't found an unwind dest for CurrentPad, we may have queued its - // children, so move on to the next in the worklist. - if (!UnwindDestToken) - continue; - - // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits - // any ancestors of CurrentPad up to but not including UnwindDestToken's - // parent pad. Record this in the memo map, and check to see if the - // original EHPad being queried is one of the ones exited. - Value *UnwindParent; - if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken)) - UnwindParent = getParentPad(UnwindPad); - else - UnwindParent = nullptr; - bool ExitedOriginalPad = false; - for (Instruction *ExitedPad = CurrentPad; - ExitedPad && ExitedPad != UnwindParent; - ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) { - // Skip over catchpads since they just follow their catchswitches. - if (isa<CatchPadInst>(ExitedPad)) - continue; - MemoMap[ExitedPad] = UnwindDestToken; - ExitedOriginalPad |= (ExitedPad == EHPad); - } - - if (ExitedOriginalPad) - return UnwindDestToken; - - // Continue the search. - } - - // No definitive information is contained within this funclet. - return nullptr; -} - -/// Given an EH pad, find where it unwinds. If it unwinds to an EH pad, -/// return that pad instruction. If it unwinds to caller, return -/// ConstantTokenNone. If it does not have a definitive unwind destination, -/// return nullptr. -/// -/// This routine gets invoked for calls in funclets in inlinees when inlining -/// an invoke. Since many funclets don't have calls inside them, it's queried -/// on-demand rather than building a map of pads to unwind dests up front. -/// Determining a funclet's unwind dest may require recursively searching its -/// descendants, and also ancestors and cousins if the descendants don't provide -/// an answer. Since most funclets will have their unwind dest immediately -/// available as the unwind dest of a catchswitch or cleanupret, this routine -/// searches top-down from the given pad and then up. To avoid worst-case -/// quadratic run-time given that approach, it uses a memo map to avoid -/// re-processing funclet trees. The callers that rewrite the IR as they go -/// take advantage of this, for correctness, by checking/forcing rewritten -/// pads' entries to match the original callee view. -static Value *getUnwindDestToken(Instruction *EHPad, - UnwindDestMemoTy &MemoMap) { - // Catchpads unwind to the same place as their catchswitch; - // redirct any queries on catchpads so the code below can - // deal with just catchswitches and cleanuppads. - if (auto *CPI = dyn_cast<CatchPadInst>(EHPad)) - EHPad = CPI->getCatchSwitch(); - - // Check if we've already determined the unwind dest for this pad. - auto Memo = MemoMap.find(EHPad); - if (Memo != MemoMap.end()) - return Memo->second; - - // Search EHPad and, if necessary, its descendants. - Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap); - assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0)); - if (UnwindDestToken) - return UnwindDestToken; - - // No information is available for this EHPad from itself or any of its - // descendants. An unwind all the way out to a pad in the caller would - // need also to agree with the unwind dest of the parent funclet, so - // search up the chain to try to find a funclet with information. Put - // null entries in the memo map to avoid re-processing as we go up. - MemoMap[EHPad] = nullptr; -#ifndef NDEBUG - SmallPtrSet<Instruction *, 4> TempMemos; - TempMemos.insert(EHPad); -#endif - Instruction *LastUselessPad = EHPad; - Value *AncestorToken; - for (AncestorToken = getParentPad(EHPad); - auto *AncestorPad = dyn_cast<Instruction>(AncestorToken); - AncestorToken = getParentPad(AncestorToken)) { - // Skip over catchpads since they just follow their catchswitches. - if (isa<CatchPadInst>(AncestorPad)) - continue; - // If the MemoMap had an entry mapping AncestorPad to nullptr, since we - // haven't yet called getUnwindDestTokenHelper for AncestorPad in this - // call to getUnwindDestToken, that would mean that AncestorPad had no - // information in itself, its descendants, or its ancestors. If that - // were the case, then we should also have recorded the lack of information - // for the descendant that we're coming from. So assert that we don't - // find a null entry in the MemoMap for AncestorPad. - assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]); - auto AncestorMemo = MemoMap.find(AncestorPad); - if (AncestorMemo == MemoMap.end()) { - UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap); - } else { - UnwindDestToken = AncestorMemo->second; - } - if (UnwindDestToken) - break; - LastUselessPad = AncestorPad; - MemoMap[LastUselessPad] = nullptr; -#ifndef NDEBUG - TempMemos.insert(LastUselessPad); -#endif - } - - // We know that getUnwindDestTokenHelper was called on LastUselessPad and - // returned nullptr (and likewise for EHPad and any of its ancestors up to - // LastUselessPad), so LastUselessPad has no information from below. Since - // getUnwindDestTokenHelper must investigate all downward paths through - // no-information nodes to prove that a node has no information like this, - // and since any time it finds information it records it in the MemoMap for - // not just the immediately-containing funclet but also any ancestors also - // exited, it must be the case that, walking downward from LastUselessPad, - // visiting just those nodes which have not been mapped to an unwind dest - // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since - // they are just used to keep getUnwindDestTokenHelper from repeating work), - // any node visited must have been exhaustively searched with no information - // for it found. - SmallVector<Instruction *, 8> Worklist(1, LastUselessPad); - while (!Worklist.empty()) { - Instruction *UselessPad = Worklist.pop_back_val(); - auto Memo = MemoMap.find(UselessPad); - if (Memo != MemoMap.end() && Memo->second) { - // Here the name 'UselessPad' is a bit of a misnomer, because we've found - // that it is a funclet that does have information about unwinding to - // a particular destination; its parent was a useless pad. - // Since its parent has no information, the unwind edge must not escape - // the parent, and must target a sibling of this pad. This local unwind - // gives us no information about EHPad. Leave it and the subtree rooted - // at it alone. - assert(getParentPad(Memo->second) == getParentPad(UselessPad)); - continue; - } - // We know we don't have information for UselesPad. If it has an entry in - // the MemoMap (mapping it to nullptr), it must be one of the TempMemos - // added on this invocation of getUnwindDestToken; if a previous invocation - // recorded nullptr, it would have had to prove that the ancestors of - // UselessPad, which include LastUselessPad, had no information, and that - // in turn would have required proving that the descendants of - // LastUselesPad, which include EHPad, have no information about - // LastUselessPad, which would imply that EHPad was mapped to nullptr in - // the MemoMap on that invocation, which isn't the case if we got here. - assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad)); - // Assert as we enumerate users that 'UselessPad' doesn't have any unwind - // information that we'd be contradicting by making a map entry for it - // (which is something that getUnwindDestTokenHelper must have proved for - // us to get here). Just assert on is direct users here; the checks in - // this downward walk at its descendants will verify that they don't have - // any unwind edges that exit 'UselessPad' either (i.e. they either have no - // unwind edges or unwind to a sibling). - MemoMap[UselessPad] = UnwindDestToken; - if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) { - assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad"); - for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) { - auto *CatchPad = HandlerBlock->getFirstNonPHI(); - for (User *U : CatchPad->users()) { - assert( - (!isa<InvokeInst>(U) || - (getParentPad( - cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == - CatchPad)) && - "Expected useless pad"); - if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) - Worklist.push_back(cast<Instruction>(U)); - } - } - } else { - assert(isa<CleanupPadInst>(UselessPad)); - for (User *U : UselessPad->users()) { - assert(!isa<CleanupReturnInst>(U) && "Expected useless pad"); - assert((!isa<InvokeInst>(U) || - (getParentPad( - cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) == - UselessPad)) && - "Expected useless pad"); - if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) - Worklist.push_back(cast<Instruction>(U)); - } - } - } - - return UnwindDestToken; -} - -/// When we inline a basic block into an invoke, -/// we have to turn all of the calls that can throw into invokes. -/// This function analyze BB to see if there are any calls, and if so, -/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI -/// nodes in that block with the values specified in InvokeDestPHIValues. -static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( - BasicBlock *BB, BasicBlock *UnwindEdge, - UnwindDestMemoTy *FuncletUnwindMap = nullptr) { - for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { - Instruction *I = &*BBI++; - - // We only need to check for function calls: inlined invoke - // instructions require no special handling. - CallInst *CI = dyn_cast<CallInst>(I); - - if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue())) - continue; - - // We do not need to (and in fact, cannot) convert possibly throwing calls - // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into - // invokes. The caller's "segment" of the deoptimization continuation - // attached to the newly inlined @llvm.experimental_deoptimize - // (resp. @llvm.experimental.guard) call should contain the exception - // handling logic, if any. - if (auto *F = CI->getCalledFunction()) - if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize || - F->getIntrinsicID() == Intrinsic::experimental_guard) - continue; - - if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) { - // This call is nested inside a funclet. If that funclet has an unwind - // destination within the inlinee, then unwinding out of this call would - // be UB. Rewriting this call to an invoke which targets the inlined - // invoke's unwind dest would give the call's parent funclet multiple - // unwind destinations, which is something that subsequent EH table - // generation can't handle and that the veirifer rejects. So when we - // see such a call, leave it as a call. - auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]); - Value *UnwindDestToken = - getUnwindDestToken(FuncletPad, *FuncletUnwindMap); - if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) - continue; -#ifndef NDEBUG - Instruction *MemoKey; - if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad)) - MemoKey = CatchPad->getCatchSwitch(); - else - MemoKey = FuncletPad; - assert(FuncletUnwindMap->count(MemoKey) && - (*FuncletUnwindMap)[MemoKey] == UnwindDestToken && - "must get memoized to avoid confusing later searches"); -#endif // NDEBUG - } - - changeToInvokeAndSplitBasicBlock(CI, UnwindEdge); - return BB; - } - return nullptr; -} - -/// If we inlined an invoke site, we need to convert calls -/// in the body of the inlined function into invokes. -/// -/// II is the invoke instruction being inlined. FirstNewBlock is the first -/// block of the inlined code (the last block is the end of the function), -/// and InlineCodeInfo is information about the code that got inlined. -static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock, - ClonedCodeInfo &InlinedCodeInfo) { - BasicBlock *InvokeDest = II->getUnwindDest(); - - Function *Caller = FirstNewBlock->getParent(); - - // The inlined code is currently at the end of the function, scan from the - // start of the inlined code to its end, checking for stuff we need to - // rewrite. - LandingPadInliningInfo Invoke(II); - - // Get all of the inlined landing pad instructions. - SmallPtrSet<LandingPadInst*, 16> InlinedLPads; - for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end(); - I != E; ++I) - if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) - InlinedLPads.insert(II->getLandingPadInst()); - - // Append the clauses from the outer landing pad instruction into the inlined - // landing pad instructions. - LandingPadInst *OuterLPad = Invoke.getLandingPadInst(); - for (LandingPadInst *InlinedLPad : InlinedLPads) { - unsigned OuterNum = OuterLPad->getNumClauses(); - InlinedLPad->reserveClauses(OuterNum); - for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx) - InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); - if (OuterLPad->isCleanup()) - InlinedLPad->setCleanup(true); - } - - for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); - BB != E; ++BB) { - if (InlinedCodeInfo.ContainsCalls) - if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( - &*BB, Invoke.getOuterResumeDest())) - // Update any PHI nodes in the exceptional block to indicate that there - // is now a new entry in them. - Invoke.addIncomingPHIValuesFor(NewBB); - - // Forward any resumes that are remaining here. - if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) - Invoke.forwardResume(RI, InlinedLPads); - } - - // Now that everything is happy, we have one final detail. The PHI nodes in - // the exception destination block still have entries due to the original - // invoke instruction. Eliminate these entries (which might even delete the - // PHI node) now. - InvokeDest->removePredecessor(II->getParent()); -} - -/// If we inlined an invoke site, we need to convert calls -/// in the body of the inlined function into invokes. -/// -/// II is the invoke instruction being inlined. FirstNewBlock is the first -/// block of the inlined code (the last block is the end of the function), -/// and InlineCodeInfo is information about the code that got inlined. -static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, - ClonedCodeInfo &InlinedCodeInfo) { - BasicBlock *UnwindDest = II->getUnwindDest(); - Function *Caller = FirstNewBlock->getParent(); - - assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!"); - - // If there are PHI nodes in the unwind destination block, we need to keep - // track of which values came into them from the invoke before removing the - // edge from this block. - SmallVector<Value *, 8> UnwindDestPHIValues; - BasicBlock *InvokeBB = II->getParent(); - for (Instruction &I : *UnwindDest) { - // Save the value to use for this edge. - PHINode *PHI = dyn_cast<PHINode>(&I); - if (!PHI) - break; - UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); - } - - // Add incoming-PHI values to the unwind destination block for the given basic - // block, using the values for the original invoke's source block. - auto UpdatePHINodes = [&](BasicBlock *Src) { - BasicBlock::iterator I = UnwindDest->begin(); - for (Value *V : UnwindDestPHIValues) { - PHINode *PHI = cast<PHINode>(I); - PHI->addIncoming(V, Src); - ++I; - } - }; - - // This connects all the instructions which 'unwind to caller' to the invoke - // destination. - UnwindDestMemoTy FuncletUnwindMap; - for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); - BB != E; ++BB) { - if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) { - if (CRI->unwindsToCaller()) { - auto *CleanupPad = CRI->getCleanupPad(); - CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI); - CRI->eraseFromParent(); - UpdatePHINodes(&*BB); - // Finding a cleanupret with an unwind destination would confuse - // subsequent calls to getUnwindDestToken, so map the cleanuppad - // to short-circuit any such calls and recognize this as an "unwind - // to caller" cleanup. - assert(!FuncletUnwindMap.count(CleanupPad) || - isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad])); - FuncletUnwindMap[CleanupPad] = - ConstantTokenNone::get(Caller->getContext()); - } - } - - Instruction *I = BB->getFirstNonPHI(); - if (!I->isEHPad()) - continue; - - Instruction *Replacement = nullptr; - if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) { - if (CatchSwitch->unwindsToCaller()) { - Value *UnwindDestToken; - if (auto *ParentPad = - dyn_cast<Instruction>(CatchSwitch->getParentPad())) { - // This catchswitch is nested inside another funclet. If that - // funclet has an unwind destination within the inlinee, then - // unwinding out of this catchswitch would be UB. Rewriting this - // catchswitch to unwind to the inlined invoke's unwind dest would - // give the parent funclet multiple unwind destinations, which is - // something that subsequent EH table generation can't handle and - // that the veirifer rejects. So when we see such a call, leave it - // as "unwind to caller". - UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap); - if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) - continue; - } else { - // This catchswitch has no parent to inherit constraints from, and - // none of its descendants can have an unwind edge that exits it and - // targets another funclet in the inlinee. It may or may not have a - // descendant that definitively has an unwind to caller. In either - // case, we'll have to assume that any unwinds out of it may need to - // be routed to the caller, so treat it as though it has a definitive - // unwind to caller. - UnwindDestToken = ConstantTokenNone::get(Caller->getContext()); - } - auto *NewCatchSwitch = CatchSwitchInst::Create( - CatchSwitch->getParentPad(), UnwindDest, - CatchSwitch->getNumHandlers(), CatchSwitch->getName(), - CatchSwitch); - for (BasicBlock *PadBB : CatchSwitch->handlers()) - NewCatchSwitch->addHandler(PadBB); - // Propagate info for the old catchswitch over to the new one in - // the unwind map. This also serves to short-circuit any subsequent - // checks for the unwind dest of this catchswitch, which would get - // confused if they found the outer handler in the callee. - FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken; - Replacement = NewCatchSwitch; - } - } else if (!isa<FuncletPadInst>(I)) { - llvm_unreachable("unexpected EHPad!"); - } - - if (Replacement) { - Replacement->takeName(I); - I->replaceAllUsesWith(Replacement); - I->eraseFromParent(); - UpdatePHINodes(&*BB); - } - } - - if (InlinedCodeInfo.ContainsCalls) - for (Function::iterator BB = FirstNewBlock->getIterator(), - E = Caller->end(); - BB != E; ++BB) - if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( - &*BB, UnwindDest, &FuncletUnwindMap)) - // Update any PHI nodes in the exceptional block to indicate that there - // is now a new entry in them. - UpdatePHINodes(NewBB); - - // Now that everything is happy, we have one final detail. The PHI nodes in - // the exception destination block still have entries due to the original - // invoke instruction. Eliminate these entries (which might even delete the - // PHI node) now. - UnwindDest->removePredecessor(InvokeBB); -} - -/// When inlining a call site that has !llvm.mem.parallel_loop_access or -/// llvm.access.group metadata, that metadata should be propagated to all -/// memory-accessing cloned instructions. -static void PropagateParallelLoopAccessMetadata(CallSite CS, - ValueToValueMapTy &VMap) { - MDNode *M = - CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access); - MDNode *CallAccessGroup = - CS.getInstruction()->getMetadata(LLVMContext::MD_access_group); - if (!M && !CallAccessGroup) - return; - - for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); - VMI != VMIE; ++VMI) { - if (!VMI->second) - continue; - - Instruction *NI = dyn_cast<Instruction>(VMI->second); - if (!NI) - continue; - - if (M) { - if (MDNode *PM = - NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) { - M = MDNode::concatenate(PM, M); - NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); - } else if (NI->mayReadOrWriteMemory()) { - NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M); - } - } - - if (NI->mayReadOrWriteMemory()) { - MDNode *UnitedAccGroups = uniteAccessGroups( - NI->getMetadata(LLVMContext::MD_access_group), CallAccessGroup); - NI->setMetadata(LLVMContext::MD_access_group, UnitedAccGroups); - } - } -} - -/// When inlining a function that contains noalias scope metadata, -/// this metadata needs to be cloned so that the inlined blocks -/// have different "unique scopes" at every call site. Were this not done, then -/// aliasing scopes from a function inlined into a caller multiple times could -/// not be differentiated (and this would lead to miscompiles because the -/// non-aliasing property communicated by the metadata could have -/// call-site-specific control dependencies). -static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { - const Function *CalledFunc = CS.getCalledFunction(); - SetVector<const MDNode *> MD; - - // Note: We could only clone the metadata if it is already used in the - // caller. I'm omitting that check here because it might confuse - // inter-procedural alias analysis passes. We can revisit this if it becomes - // an efficiency or overhead problem. - - for (const BasicBlock &I : *CalledFunc) - for (const Instruction &J : I) { - if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope)) - MD.insert(M); - if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias)) - MD.insert(M); - } - - if (MD.empty()) - return; - - // Walk the existing metadata, adding the complete (perhaps cyclic) chain to - // the set. - SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end()); - while (!Queue.empty()) { - const MDNode *M = cast<MDNode>(Queue.pop_back_val()); - for (unsigned i = 0, ie = M->getNumOperands(); i != ie; ++i) - if (const MDNode *M1 = dyn_cast<MDNode>(M->getOperand(i))) - if (MD.insert(M1)) - Queue.push_back(M1); - } - - // Now we have a complete set of all metadata in the chains used to specify - // the noalias scopes and the lists of those scopes. - SmallVector<TempMDTuple, 16> DummyNodes; - DenseMap<const MDNode *, TrackingMDNodeRef> MDMap; - for (const MDNode *I : MD) { - DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); - MDMap[I].reset(DummyNodes.back().get()); - } - - // Create new metadata nodes to replace the dummy nodes, replacing old - // metadata references with either a dummy node or an already-created new - // node. - for (const MDNode *I : MD) { - SmallVector<Metadata *, 4> NewOps; - for (unsigned i = 0, ie = I->getNumOperands(); i != ie; ++i) { - const Metadata *V = I->getOperand(i); - if (const MDNode *M = dyn_cast<MDNode>(V)) - NewOps.push_back(MDMap[M]); - else - NewOps.push_back(const_cast<Metadata *>(V)); - } - - MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps); - MDTuple *TempM = cast<MDTuple>(MDMap[I]); - assert(TempM->isTemporary() && "Expected temporary node"); - - TempM->replaceAllUsesWith(NewM); - } - - // Now replace the metadata in the new inlined instructions with the - // repacements from the map. - for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); - VMI != VMIE; ++VMI) { - if (!VMI->second) - continue; - - Instruction *NI = dyn_cast<Instruction>(VMI->second); - if (!NI) - continue; - - if (MDNode *M = NI->getMetadata(LLVMContext::MD_alias_scope)) { - MDNode *NewMD = MDMap[M]; - // If the call site also had alias scope metadata (a list of scopes to - // which instructions inside it might belong), propagate those scopes to - // the inlined instructions. - if (MDNode *CSM = - CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope)) - NewMD = MDNode::concatenate(NewMD, CSM); - NI->setMetadata(LLVMContext::MD_alias_scope, NewMD); - } else if (NI->mayReadOrWriteMemory()) { - if (MDNode *M = - CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope)) - NI->setMetadata(LLVMContext::MD_alias_scope, M); - } - - if (MDNode *M = NI->getMetadata(LLVMContext::MD_noalias)) { - MDNode *NewMD = MDMap[M]; - // If the call site also had noalias metadata (a list of scopes with - // which instructions inside it don't alias), propagate those scopes to - // the inlined instructions. - if (MDNode *CSM = - CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) - NewMD = MDNode::concatenate(NewMD, CSM); - NI->setMetadata(LLVMContext::MD_noalias, NewMD); - } else if (NI->mayReadOrWriteMemory()) { - if (MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) - NI->setMetadata(LLVMContext::MD_noalias, M); - } - } -} - -/// If the inlined function has noalias arguments, -/// then add new alias scopes for each noalias argument, tag the mapped noalias -/// parameters with noalias metadata specifying the new scope, and tag all -/// non-derived loads, stores and memory intrinsics with the new alias scopes. -static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, - const DataLayout &DL, AAResults *CalleeAAR) { - if (!EnableNoAliasConversion) - return; - - const Function *CalledFunc = CS.getCalledFunction(); - SmallVector<const Argument *, 4> NoAliasArgs; - - for (const Argument &Arg : CalledFunc->args()) - if (Arg.hasNoAliasAttr() && !Arg.use_empty()) - NoAliasArgs.push_back(&Arg); - - if (NoAliasArgs.empty()) - return; - - // To do a good job, if a noalias variable is captured, we need to know if - // the capture point dominates the particular use we're considering. - DominatorTree DT; - DT.recalculate(const_cast<Function&>(*CalledFunc)); - - // noalias indicates that pointer values based on the argument do not alias - // pointer values which are not based on it. So we add a new "scope" for each - // noalias function argument. Accesses using pointers based on that argument - // become part of that alias scope, accesses using pointers not based on that - // argument are tagged as noalias with that scope. - - DenseMap<const Argument *, MDNode *> NewScopes; - MDBuilder MDB(CalledFunc->getContext()); - - // Create a new scope domain for this function. - MDNode *NewDomain = - MDB.createAnonymousAliasScopeDomain(CalledFunc->getName()); - for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) { - const Argument *A = NoAliasArgs[i]; - - std::string Name = CalledFunc->getName(); - if (A->hasName()) { - Name += ": %"; - Name += A->getName(); - } else { - Name += ": argument "; - Name += utostr(i); - } - - // Note: We always create a new anonymous root here. This is true regardless - // of the linkage of the callee because the aliasing "scope" is not just a - // property of the callee, but also all control dependencies in the caller. - MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); - NewScopes.insert(std::make_pair(A, NewScope)); - } - - // Iterate over all new instructions in the map; for all memory-access - // instructions, add the alias scope metadata. - for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); - VMI != VMIE; ++VMI) { - if (const Instruction *I = dyn_cast<Instruction>(VMI->first)) { - if (!VMI->second) - continue; - - Instruction *NI = dyn_cast<Instruction>(VMI->second); - if (!NI) - continue; - - bool IsArgMemOnlyCall = false, IsFuncCall = false; - SmallVector<const Value *, 2> PtrArgs; - - if (const LoadInst *LI = dyn_cast<LoadInst>(I)) - PtrArgs.push_back(LI->getPointerOperand()); - else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) - PtrArgs.push_back(SI->getPointerOperand()); - else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) - PtrArgs.push_back(VAAI->getPointerOperand()); - else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I)) - PtrArgs.push_back(CXI->getPointerOperand()); - else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) - PtrArgs.push_back(RMWI->getPointerOperand()); - else if (const auto *Call = dyn_cast<CallBase>(I)) { - // If we know that the call does not access memory, then we'll still - // know that about the inlined clone of this call site, and we don't - // need to add metadata. - if (Call->doesNotAccessMemory()) - continue; - - IsFuncCall = true; - if (CalleeAAR) { - FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(Call); - if (MRB == FMRB_OnlyAccessesArgumentPointees || - MRB == FMRB_OnlyReadsArgumentPointees) - IsArgMemOnlyCall = true; - } - - for (Value *Arg : Call->args()) { - // We need to check the underlying objects of all arguments, not just - // the pointer arguments, because we might be passing pointers as - // integers, etc. - // However, if we know that the call only accesses pointer arguments, - // then we only need to check the pointer arguments. - if (IsArgMemOnlyCall && !Arg->getType()->isPointerTy()) - continue; - - PtrArgs.push_back(Arg); - } - } - - // If we found no pointers, then this instruction is not suitable for - // pairing with an instruction to receive aliasing metadata. - // However, if this is a call, this we might just alias with none of the - // noalias arguments. - if (PtrArgs.empty() && !IsFuncCall) - continue; - - // It is possible that there is only one underlying object, but you - // need to go through several PHIs to see it, and thus could be - // repeated in the Objects list. - SmallPtrSet<const Value *, 4> ObjSet; - SmallVector<Metadata *, 4> Scopes, NoAliases; - - SmallSetVector<const Argument *, 4> NAPtrArgs; - for (const Value *V : PtrArgs) { - SmallVector<const Value *, 4> Objects; - GetUnderlyingObjects(V, Objects, DL, /* LI = */ nullptr); - - for (const Value *O : Objects) - ObjSet.insert(O); - } - - // Figure out if we're derived from anything that is not a noalias - // argument. - bool CanDeriveViaCapture = false, UsesAliasingPtr = false; - for (const Value *V : ObjSet) { - // Is this value a constant that cannot be derived from any pointer - // value (we need to exclude constant expressions, for example, that - // are formed from arithmetic on global symbols). - bool IsNonPtrConst = isa<ConstantInt>(V) || isa<ConstantFP>(V) || - isa<ConstantPointerNull>(V) || - isa<ConstantDataVector>(V) || isa<UndefValue>(V); - if (IsNonPtrConst) - continue; - - // If this is anything other than a noalias argument, then we cannot - // completely describe the aliasing properties using alias.scope - // metadata (and, thus, won't add any). - if (const Argument *A = dyn_cast<Argument>(V)) { - if (!A->hasNoAliasAttr()) - UsesAliasingPtr = true; - } else { - UsesAliasingPtr = true; - } - - // If this is not some identified function-local object (which cannot - // directly alias a noalias argument), or some other argument (which, - // by definition, also cannot alias a noalias argument), then we could - // alias a noalias argument that has been captured). - if (!isa<Argument>(V) && - !isIdentifiedFunctionLocal(const_cast<Value*>(V))) - CanDeriveViaCapture = true; - } - - // A function call can always get captured noalias pointers (via other - // parameters, globals, etc.). - if (IsFuncCall && !IsArgMemOnlyCall) - CanDeriveViaCapture = true; - - // First, we want to figure out all of the sets with which we definitely - // don't alias. Iterate over all noalias set, and add those for which: - // 1. The noalias argument is not in the set of objects from which we - // definitely derive. - // 2. The noalias argument has not yet been captured. - // An arbitrary function that might load pointers could see captured - // noalias arguments via other noalias arguments or globals, and so we - // must always check for prior capture. - for (const Argument *A : NoAliasArgs) { - if (!ObjSet.count(A) && (!CanDeriveViaCapture || - // It might be tempting to skip the - // PointerMayBeCapturedBefore check if - // A->hasNoCaptureAttr() is true, but this is - // incorrect because nocapture only guarantees - // that no copies outlive the function, not - // that the value cannot be locally captured. - !PointerMayBeCapturedBefore(A, - /* ReturnCaptures */ false, - /* StoreCaptures */ false, I, &DT))) - NoAliases.push_back(NewScopes[A]); - } - - if (!NoAliases.empty()) - NI->setMetadata(LLVMContext::MD_noalias, - MDNode::concatenate( - NI->getMetadata(LLVMContext::MD_noalias), - MDNode::get(CalledFunc->getContext(), NoAliases))); - - // Next, we want to figure out all of the sets to which we might belong. - // We might belong to a set if the noalias argument is in the set of - // underlying objects. If there is some non-noalias argument in our list - // of underlying objects, then we cannot add a scope because the fact - // that some access does not alias with any set of our noalias arguments - // cannot itself guarantee that it does not alias with this access - // (because there is some pointer of unknown origin involved and the - // other access might also depend on this pointer). We also cannot add - // scopes to arbitrary functions unless we know they don't access any - // non-parameter pointer-values. - bool CanAddScopes = !UsesAliasingPtr; - if (CanAddScopes && IsFuncCall) - CanAddScopes = IsArgMemOnlyCall; - - if (CanAddScopes) - for (const Argument *A : NoAliasArgs) { - if (ObjSet.count(A)) - Scopes.push_back(NewScopes[A]); - } - - if (!Scopes.empty()) - NI->setMetadata( - LLVMContext::MD_alias_scope, - MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope), - MDNode::get(CalledFunc->getContext(), Scopes))); - } - } -} - -/// If the inlined function has non-byval align arguments, then -/// add @llvm.assume-based alignment assumptions to preserve this information. -static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { - if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache) - return; - - AssumptionCache *AC = &(*IFI.GetAssumptionCache)(*CS.getCaller()); - auto &DL = CS.getCaller()->getParent()->getDataLayout(); - - // To avoid inserting redundant assumptions, we should check for assumptions - // already in the caller. To do this, we might need a DT of the caller. - DominatorTree DT; - bool DTCalculated = false; - - Function *CalledFunc = CS.getCalledFunction(); - for (Argument &Arg : CalledFunc->args()) { - unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0; - if (Align && !Arg.hasByValOrInAllocaAttr() && !Arg.hasNUses(0)) { - if (!DTCalculated) { - DT.recalculate(*CS.getCaller()); - DTCalculated = true; - } - - // If we can already prove the asserted alignment in the context of the - // caller, then don't bother inserting the assumption. - Value *ArgVal = CS.getArgument(Arg.getArgNo()); - if (getKnownAlignment(ArgVal, DL, CS.getInstruction(), AC, &DT) >= Align) - continue; - - CallInst *NewAsmp = IRBuilder<>(CS.getInstruction()) - .CreateAlignmentAssumption(DL, ArgVal, Align); - AC->registerAssumption(NewAsmp); - } - } -} - -/// Once we have cloned code over from a callee into the caller, -/// update the specified callgraph to reflect the changes we made. -/// Note that it's possible that not all code was copied over, so only -/// some edges of the callgraph may remain. -static void UpdateCallGraphAfterInlining(CallSite CS, - Function::iterator FirstNewBlock, - ValueToValueMapTy &VMap, - InlineFunctionInfo &IFI) { - CallGraph &CG = *IFI.CG; - const Function *Caller = CS.getCaller(); - const Function *Callee = CS.getCalledFunction(); - CallGraphNode *CalleeNode = CG[Callee]; - CallGraphNode *CallerNode = CG[Caller]; - - // Since we inlined some uninlined call sites in the callee into the caller, - // add edges from the caller to all of the callees of the callee. - CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end(); - - // Consider the case where CalleeNode == CallerNode. - CallGraphNode::CalledFunctionsVector CallCache; - if (CalleeNode == CallerNode) { - CallCache.assign(I, E); - I = CallCache.begin(); - E = CallCache.end(); - } - - for (; I != E; ++I) { - const Value *OrigCall = I->first; - - ValueToValueMapTy::iterator VMI = VMap.find(OrigCall); - // Only copy the edge if the call was inlined! - if (VMI == VMap.end() || VMI->second == nullptr) - continue; - - // If the call was inlined, but then constant folded, there is no edge to - // add. Check for this case. - auto *NewCall = dyn_cast<CallBase>(VMI->second); - if (!NewCall) - continue; - - // We do not treat intrinsic calls like real function calls because we - // expect them to become inline code; do not add an edge for an intrinsic. - if (NewCall->getCalledFunction() && - NewCall->getCalledFunction()->isIntrinsic()) - continue; - - // Remember that this call site got inlined for the client of - // InlineFunction. - IFI.InlinedCalls.push_back(NewCall); - - // It's possible that inlining the callsite will cause it to go from an - // indirect to a direct call by resolving a function pointer. If this - // happens, set the callee of the new call site to a more precise - // destination. This can also happen if the call graph node of the caller - // was just unnecessarily imprecise. - if (!I->second->getFunction()) - if (Function *F = NewCall->getCalledFunction()) { - // Indirect call site resolved to direct call. - CallerNode->addCalledFunction(NewCall, CG[F]); - - continue; - } - - CallerNode->addCalledFunction(NewCall, I->second); - } - - // Update the call graph by deleting the edge from Callee to Caller. We must - // do this after the loop above in case Caller and Callee are the same. - CallerNode->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction())); -} - -static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, - BasicBlock *InsertBlock, - InlineFunctionInfo &IFI) { - Type *AggTy = cast<PointerType>(Src->getType())->getElementType(); - IRBuilder<> Builder(InsertBlock, InsertBlock->begin()); - - Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy)); - - // Always generate a memcpy of alignment 1 here because we don't know - // the alignment of the src pointer. Other optimizations can infer - // better alignment. - Builder.CreateMemCpy(Dst, /*DstAlign*/1, Src, /*SrcAlign*/1, Size); -} - -/// When inlining a call site that has a byval argument, -/// we have to make the implicit memcpy explicit by adding it. -static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, - const Function *CalledFunc, - InlineFunctionInfo &IFI, - unsigned ByValAlignment) { - PointerType *ArgTy = cast<PointerType>(Arg->getType()); - Type *AggTy = ArgTy->getElementType(); - - Function *Caller = TheCall->getFunction(); - const DataLayout &DL = Caller->getParent()->getDataLayout(); - - // If the called function is readonly, then it could not mutate the caller's - // copy of the byval'd memory. In this case, it is safe to elide the copy and - // temporary. - if (CalledFunc->onlyReadsMemory()) { - // If the byval argument has a specified alignment that is greater than the - // passed in pointer, then we either have to round up the input pointer or - // give up on this transformation. - if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. - return Arg; - - AssumptionCache *AC = - IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr; - - // If the pointer is already known to be sufficiently aligned, or if we can - // round it up to a larger alignment, then we don't need a temporary. - if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, AC) >= - ByValAlignment) - return Arg; - - // Otherwise, we have to make a memcpy to get a safe alignment. This is bad - // for code quality, but rarely happens and is required for correctness. - } - - // Create the alloca. If we have DataLayout, use nice alignment. - unsigned Align = DL.getPrefTypeAlignment(AggTy); - - // If the byval had an alignment specified, we *must* use at least that - // alignment, as it is required by the byval argument (and uses of the - // pointer inside the callee). - Align = std::max(Align, ByValAlignment); - - Value *NewAlloca = new AllocaInst(AggTy, DL.getAllocaAddrSpace(), - nullptr, Align, Arg->getName(), - &*Caller->begin()->begin()); - IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca)); - - // Uses of the argument in the function should use our new alloca - // instead. - return NewAlloca; -} - -// Check whether this Value is used by a lifetime intrinsic. -static bool isUsedByLifetimeMarker(Value *V) { - for (User *U : V->users()) - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) - if (II->isLifetimeStartOrEnd()) - return true; - return false; -} - -// Check whether the given alloca already has -// lifetime.start or lifetime.end intrinsics. -static bool hasLifetimeMarkers(AllocaInst *AI) { - Type *Ty = AI->getType(); - Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(), - Ty->getPointerAddressSpace()); - if (Ty == Int8PtrTy) - return isUsedByLifetimeMarker(AI); - - // Do a scan to find all the casts to i8*. - for (User *U : AI->users()) { - if (U->getType() != Int8PtrTy) continue; - if (U->stripPointerCasts() != AI) continue; - if (isUsedByLifetimeMarker(U)) - return true; - } - return false; -} - -/// Return the result of AI->isStaticAlloca() if AI were moved to the entry -/// block. Allocas used in inalloca calls and allocas of dynamic array size -/// cannot be static. -static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) { - return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca(); -} - -/// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL -/// inlined at \p InlinedAt. \p IANodes is an inlined-at cache. -static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt, - LLVMContext &Ctx, - DenseMap<const MDNode *, MDNode *> &IANodes) { - auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes); - return DebugLoc::get(OrigDL.getLine(), OrigDL.getCol(), OrigDL.getScope(), - IA); -} - -/// Returns the LoopID for a loop which has has been cloned from another -/// function for inlining with the new inlined-at start and end locs. -static MDNode *inlineLoopID(const MDNode *OrigLoopId, DILocation *InlinedAt, - LLVMContext &Ctx, - DenseMap<const MDNode *, MDNode *> &IANodes) { - assert(OrigLoopId && OrigLoopId->getNumOperands() > 0 && - "Loop ID needs at least one operand"); - assert(OrigLoopId && OrigLoopId->getOperand(0).get() == OrigLoopId && - "Loop ID should refer to itself"); - - // Save space for the self-referential LoopID. - SmallVector<Metadata *, 4> MDs = {nullptr}; - - for (unsigned i = 1; i < OrigLoopId->getNumOperands(); ++i) { - Metadata *MD = OrigLoopId->getOperand(i); - // Update the DILocations to encode the inlined-at metadata. - if (DILocation *DL = dyn_cast<DILocation>(MD)) - MDs.push_back(inlineDebugLoc(DL, InlinedAt, Ctx, IANodes)); - else - MDs.push_back(MD); - } - - MDNode *NewLoopID = MDNode::getDistinct(Ctx, MDs); - // Insert the self-referential LoopID. - NewLoopID->replaceOperandWith(0, NewLoopID); - return NewLoopID; -} - -/// Update inlined instructions' line numbers to -/// to encode location where these instructions are inlined. -static void fixupLineNumbers(Function *Fn, Function::iterator FI, - Instruction *TheCall, bool CalleeHasDebugInfo) { - const DebugLoc &TheCallDL = TheCall->getDebugLoc(); - if (!TheCallDL) - return; - - auto &Ctx = Fn->getContext(); - DILocation *InlinedAtNode = TheCallDL; - - // Create a unique call site, not to be confused with any other call from the - // same location. - InlinedAtNode = DILocation::getDistinct( - Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(), - InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt()); - - // Cache the inlined-at nodes as they're built so they are reused, without - // this every instruction's inlined-at chain would become distinct from each - // other. - DenseMap<const MDNode *, MDNode *> IANodes; - - for (; FI != Fn->end(); ++FI) { - for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); - BI != BE; ++BI) { - // Loop metadata needs to be updated so that the start and end locs - // reference inlined-at locations. - if (MDNode *LoopID = BI->getMetadata(LLVMContext::MD_loop)) { - MDNode *NewLoopID = - inlineLoopID(LoopID, InlinedAtNode, BI->getContext(), IANodes); - BI->setMetadata(LLVMContext::MD_loop, NewLoopID); - } - - if (DebugLoc DL = BI->getDebugLoc()) { - DebugLoc IDL = - inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes); - BI->setDebugLoc(IDL); - continue; - } - - if (CalleeHasDebugInfo) - continue; - - // If the inlined instruction has no line number, make it look as if it - // originates from the call location. This is important for - // ((__always_inline__, __nodebug__)) functions which must use caller - // location for all instructions in their function body. - - // Don't update static allocas, as they may get moved later. - if (auto *AI = dyn_cast<AllocaInst>(BI)) - if (allocaWouldBeStaticInEntry(AI)) - continue; - - BI->setDebugLoc(TheCallDL); - } - } -} - -/// Update the block frequencies of the caller after a callee has been inlined. -/// -/// Each block cloned into the caller has its block frequency scaled by the -/// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of -/// callee's entry block gets the same frequency as the callsite block and the -/// relative frequencies of all cloned blocks remain the same after cloning. -static void updateCallerBFI(BasicBlock *CallSiteBlock, - const ValueToValueMapTy &VMap, - BlockFrequencyInfo *CallerBFI, - BlockFrequencyInfo *CalleeBFI, - const BasicBlock &CalleeEntryBlock) { - SmallPtrSet<BasicBlock *, 16> ClonedBBs; - for (auto const &Entry : VMap) { - if (!isa<BasicBlock>(Entry.first) || !Entry.second) - continue; - auto *OrigBB = cast<BasicBlock>(Entry.first); - auto *ClonedBB = cast<BasicBlock>(Entry.second); - uint64_t Freq = CalleeBFI->getBlockFreq(OrigBB).getFrequency(); - if (!ClonedBBs.insert(ClonedBB).second) { - // Multiple blocks in the callee might get mapped to one cloned block in - // the caller since we prune the callee as we clone it. When that happens, - // we want to use the maximum among the original blocks' frequencies. - uint64_t NewFreq = CallerBFI->getBlockFreq(ClonedBB).getFrequency(); - if (NewFreq > Freq) - Freq = NewFreq; - } - CallerBFI->setBlockFreq(ClonedBB, Freq); - } - BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock)); - CallerBFI->setBlockFreqAndScale( - EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(), - ClonedBBs); -} - -/// Update the branch metadata for cloned call instructions. -static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, - const ProfileCount &CalleeEntryCount, - const Instruction *TheCall, - ProfileSummaryInfo *PSI, - BlockFrequencyInfo *CallerBFI) { - if (!CalleeEntryCount.hasValue() || CalleeEntryCount.isSynthetic() || - CalleeEntryCount.getCount() < 1) - return; - auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; - int64_t CallCount = - std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0, - CalleeEntryCount.getCount()); - updateProfileCallee(Callee, -CallCount, &VMap); -} - -void llvm::updateProfileCallee( - Function *Callee, int64_t entryDelta, - const ValueMap<const Value *, WeakTrackingVH> *VMap) { - auto CalleeCount = Callee->getEntryCount(); - if (!CalleeCount.hasValue()) - return; - - uint64_t priorEntryCount = CalleeCount.getCount(); - uint64_t newEntryCount; - - // Since CallSiteCount is an estimate, it could exceed the original callee - // count and has to be set to 0 so guard against underflow. - if (entryDelta < 0 && static_cast<uint64_t>(-entryDelta) > priorEntryCount) - newEntryCount = 0; - else - newEntryCount = priorEntryCount + entryDelta; - - Callee->setEntryCount(newEntryCount); - - // During inlining ? - if (VMap) { - uint64_t cloneEntryCount = priorEntryCount - newEntryCount; - for (auto const &Entry : *VMap) - if (isa<CallInst>(Entry.first)) - if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) - CI->updateProfWeight(cloneEntryCount, priorEntryCount); - } - for (BasicBlock &BB : *Callee) - // No need to update the callsite if it is pruned during inlining. - if (!VMap || VMap->count(&BB)) - for (Instruction &I : BB) - if (CallInst *CI = dyn_cast<CallInst>(&I)) - CI->updateProfWeight(newEntryCount, priorEntryCount); -} - -/// This function inlines the called function into the basic block of the -/// caller. This returns false if it is not possible to inline this call. -/// The program is still in a well defined state if this occurs though. -/// -/// Note that this only does one level of inlining. For example, if the -/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now -/// exists in the instruction stream. Similarly this will inline a recursive -/// function by one level. -llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, - AAResults *CalleeAAR, - bool InsertLifetime, - Function *ForwardVarArgsTo) { - Instruction *TheCall = CS.getInstruction(); - assert(TheCall->getParent() && TheCall->getFunction() - && "Instruction not in function!"); - - // FIXME: we don't inline callbr yet. - if (isa<CallBrInst>(TheCall)) - return false; - - // If IFI has any state in it, zap it before we fill it in. - IFI.reset(); - - Function *CalledFunc = CS.getCalledFunction(); - if (!CalledFunc || // Can't inline external function or indirect - CalledFunc->isDeclaration()) // call! - return "external or indirect"; - - // The inliner does not know how to inline through calls with operand bundles - // in general ... - if (CS.hasOperandBundles()) { - for (int i = 0, e = CS.getNumOperandBundles(); i != e; ++i) { - uint32_t Tag = CS.getOperandBundleAt(i).getTagID(); - // ... but it knows how to inline through "deopt" operand bundles ... - if (Tag == LLVMContext::OB_deopt) - continue; - // ... and "funclet" operand bundles. - if (Tag == LLVMContext::OB_funclet) - continue; - - return "unsupported operand bundle"; - } - } - - // If the call to the callee cannot throw, set the 'nounwind' flag on any - // calls that we inline. - bool MarkNoUnwind = CS.doesNotThrow(); - - BasicBlock *OrigBB = TheCall->getParent(); - Function *Caller = OrigBB->getParent(); - - // GC poses two hazards to inlining, which only occur when the callee has GC: - // 1. If the caller has no GC, then the callee's GC must be propagated to the - // caller. - // 2. If the caller has a differing GC, it is invalid to inline. - if (CalledFunc->hasGC()) { - if (!Caller->hasGC()) - Caller->setGC(CalledFunc->getGC()); - else if (CalledFunc->getGC() != Caller->getGC()) - return "incompatible GC"; - } - - // Get the personality function from the callee if it contains a landing pad. - Constant *CalledPersonality = - CalledFunc->hasPersonalityFn() - ? CalledFunc->getPersonalityFn()->stripPointerCasts() - : nullptr; - - // Find the personality function used by the landing pads of the caller. If it - // exists, then check to see that it matches the personality function used in - // the callee. - Constant *CallerPersonality = - Caller->hasPersonalityFn() - ? Caller->getPersonalityFn()->stripPointerCasts() - : nullptr; - if (CalledPersonality) { - if (!CallerPersonality) - Caller->setPersonalityFn(CalledPersonality); - // If the personality functions match, then we can perform the - // inlining. Otherwise, we can't inline. - // TODO: This isn't 100% true. Some personality functions are proper - // supersets of others and can be used in place of the other. - else if (CalledPersonality != CallerPersonality) - return "incompatible personality"; - } - - // We need to figure out which funclet the callsite was in so that we may - // properly nest the callee. - Instruction *CallSiteEHPad = nullptr; - if (CallerPersonality) { - EHPersonality Personality = classifyEHPersonality(CallerPersonality); - if (isScopedEHPersonality(Personality)) { - Optional<OperandBundleUse> ParentFunclet = - CS.getOperandBundle(LLVMContext::OB_funclet); - if (ParentFunclet) - CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front()); - - // OK, the inlining site is legal. What about the target function? - - if (CallSiteEHPad) { - if (Personality == EHPersonality::MSVC_CXX) { - // The MSVC personality cannot tolerate catches getting inlined into - // cleanup funclets. - if (isa<CleanupPadInst>(CallSiteEHPad)) { - // Ok, the call site is within a cleanuppad. Let's check the callee - // for catchpads. - for (const BasicBlock &CalledBB : *CalledFunc) { - if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI())) - return "catch in cleanup funclet"; - } - } - } else if (isAsynchronousEHPersonality(Personality)) { - // SEH is even less tolerant, there may not be any sort of exceptional - // funclet in the callee. - for (const BasicBlock &CalledBB : *CalledFunc) { - if (CalledBB.isEHPad()) - return "SEH in cleanup funclet"; - } - } - } - } - } - - // Determine if we are dealing with a call in an EHPad which does not unwind - // to caller. - bool EHPadForCallUnwindsLocally = false; - if (CallSiteEHPad && CS.isCall()) { - UnwindDestMemoTy FuncletUnwindMap; - Value *CallSiteUnwindDestToken = - getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap); - - EHPadForCallUnwindsLocally = - CallSiteUnwindDestToken && - !isa<ConstantTokenNone>(CallSiteUnwindDestToken); - } - - // Get an iterator to the last basic block in the function, which will have - // the new function inlined after it. - Function::iterator LastBlock = --Caller->end(); - - // Make sure to capture all of the return instructions from the cloned - // function. - SmallVector<ReturnInst*, 8> Returns; - ClonedCodeInfo InlinedFunctionInfo; - Function::iterator FirstNewBlock; - - { // Scope to destroy VMap after cloning. - ValueToValueMapTy VMap; - // Keep a list of pair (dst, src) to emit byval initializations. - SmallVector<std::pair<Value*, Value*>, 4> ByValInit; - - auto &DL = Caller->getParent()->getDataLayout(); - - // Calculate the vector of arguments to pass into the function cloner, which - // matches up the formal to the actual argument values. - CallSite::arg_iterator AI = CS.arg_begin(); - unsigned ArgNo = 0; - for (Function::arg_iterator I = CalledFunc->arg_begin(), - E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { - Value *ActualArg = *AI; - - // When byval arguments actually inlined, we need to make the copy implied - // by them explicit. However, we don't do this if the callee is readonly - // or readnone, because the copy would be unneeded: the callee doesn't - // modify the struct. - if (CS.isByValArgument(ArgNo)) { - ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, - CalledFunc->getParamAlignment(ArgNo)); - if (ActualArg != *AI) - ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI)); - } - - VMap[&*I] = ActualArg; - } - - // Add alignment assumptions if necessary. We do this before the inlined - // instructions are actually cloned into the caller so that we can easily - // check what will be known at the start of the inlined code. - AddAlignmentAssumptions(CS, IFI); - - // We want the inliner to prune the code as it copies. We would LOVE to - // have no dead or constant instructions leftover after inlining occurs - // (which can happen, e.g., because an argument was constant), but we'll be - // happy with whatever the cloner can do. - CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, - /*ModuleLevelChanges=*/false, Returns, ".i", - &InlinedFunctionInfo, TheCall); - // Remember the first block that is newly cloned over. - FirstNewBlock = LastBlock; ++FirstNewBlock; - - if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr) - // Update the BFI of blocks cloned into the caller. - updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI, - CalledFunc->front()); - - updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall, - IFI.PSI, IFI.CallerBFI); - - // Inject byval arguments initialization. - for (std::pair<Value*, Value*> &Init : ByValInit) - HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(), - &*FirstNewBlock, IFI); - - Optional<OperandBundleUse> ParentDeopt = - CS.getOperandBundle(LLVMContext::OB_deopt); - if (ParentDeopt) { - SmallVector<OperandBundleDef, 2> OpDefs; - - for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) { - Instruction *I = dyn_cast_or_null<Instruction>(VH); - if (!I) continue; // instruction was DCE'd or RAUW'ed to undef - - OpDefs.clear(); - - CallSite ICS(I); - OpDefs.reserve(ICS.getNumOperandBundles()); - - for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) { - auto ChildOB = ICS.getOperandBundleAt(i); - if (ChildOB.getTagID() != LLVMContext::OB_deopt) { - // If the inlined call has other operand bundles, let them be - OpDefs.emplace_back(ChildOB); - continue; - } - - // It may be useful to separate this logic (of handling operand - // bundles) out to a separate "policy" component if this gets crowded. - // Prepend the parent's deoptimization continuation to the newly - // inlined call's deoptimization continuation. - std::vector<Value *> MergedDeoptArgs; - MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() + - ChildOB.Inputs.size()); - - MergedDeoptArgs.insert(MergedDeoptArgs.end(), - ParentDeopt->Inputs.begin(), - ParentDeopt->Inputs.end()); - MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(), - ChildOB.Inputs.end()); - - OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs)); - } - - Instruction *NewI = nullptr; - if (isa<CallInst>(I)) - NewI = CallInst::Create(cast<CallInst>(I), OpDefs, I); - else if (isa<CallBrInst>(I)) - NewI = CallBrInst::Create(cast<CallBrInst>(I), OpDefs, I); - else - NewI = InvokeInst::Create(cast<InvokeInst>(I), OpDefs, I); - - // Note: the RAUW does the appropriate fixup in VMap, so we need to do - // this even if the call returns void. - I->replaceAllUsesWith(NewI); - - VH = nullptr; - I->eraseFromParent(); - } - } - - // Update the callgraph if requested. - if (IFI.CG) - UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); - - // For 'nodebug' functions, the associated DISubprogram is always null. - // Conservatively avoid propagating the callsite debug location to - // instructions inlined from a function whose DISubprogram is not null. - fixupLineNumbers(Caller, FirstNewBlock, TheCall, - CalledFunc->getSubprogram() != nullptr); - - // Clone existing noalias metadata if necessary. - CloneAliasScopeMetadata(CS, VMap); - - // Add noalias metadata if necessary. - AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR); - - // Propagate llvm.mem.parallel_loop_access if necessary. - PropagateParallelLoopAccessMetadata(CS, VMap); - - // Register any cloned assumptions. - if (IFI.GetAssumptionCache) - for (BasicBlock &NewBlock : - make_range(FirstNewBlock->getIterator(), Caller->end())) - for (Instruction &I : NewBlock) { - if (auto *II = dyn_cast<IntrinsicInst>(&I)) - if (II->getIntrinsicID() == Intrinsic::assume) - (*IFI.GetAssumptionCache)(*Caller).registerAssumption(II); - } - } - - // If there are any alloca instructions in the block that used to be the entry - // block for the callee, move them to the entry block of the caller. First - // calculate which instruction they should be inserted before. We insert the - // instructions at the end of the current alloca list. - { - BasicBlock::iterator InsertPoint = Caller->begin()->begin(); - for (BasicBlock::iterator I = FirstNewBlock->begin(), - E = FirstNewBlock->end(); I != E; ) { - AllocaInst *AI = dyn_cast<AllocaInst>(I++); - if (!AI) continue; - - // If the alloca is now dead, remove it. This often occurs due to code - // specialization. - if (AI->use_empty()) { - AI->eraseFromParent(); - continue; - } - - if (!allocaWouldBeStaticInEntry(AI)) - continue; - - // Keep track of the static allocas that we inline into the caller. - IFI.StaticAllocas.push_back(AI); - - // Scan for the block of allocas that we can move over, and move them - // all at once. - while (isa<AllocaInst>(I) && - allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) { - IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); - ++I; - } - - // Transfer all of the allocas over in a block. Using splice means - // that the instructions aren't removed from the symbol table, then - // reinserted. - Caller->getEntryBlock().getInstList().splice( - InsertPoint, FirstNewBlock->getInstList(), AI->getIterator(), I); - } - // Move any dbg.declares describing the allocas into the entry basic block. - DIBuilder DIB(*Caller->getParent()); - for (auto &AI : IFI.StaticAllocas) - replaceDbgDeclareForAlloca(AI, AI, DIB, DIExpression::ApplyOffset, 0); - } - - SmallVector<Value*,4> VarArgsToForward; - SmallVector<AttributeSet, 4> VarArgsAttrs; - for (unsigned i = CalledFunc->getFunctionType()->getNumParams(); - i < CS.getNumArgOperands(); i++) { - VarArgsToForward.push_back(CS.getArgOperand(i)); - VarArgsAttrs.push_back(CS.getAttributes().getParamAttributes(i)); - } - - bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; - if (InlinedFunctionInfo.ContainsCalls) { - CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; - if (CallInst *CI = dyn_cast<CallInst>(TheCall)) - CallSiteTailKind = CI->getTailCallKind(); - - // For inlining purposes, the "notail" marker is the same as no marker. - if (CallSiteTailKind == CallInst::TCK_NoTail) - CallSiteTailKind = CallInst::TCK_None; - - for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; - ++BB) { - for (auto II = BB->begin(); II != BB->end();) { - Instruction &I = *II++; - CallInst *CI = dyn_cast<CallInst>(&I); - if (!CI) - continue; - - // Forward varargs from inlined call site to calls to the - // ForwardVarArgsTo function, if requested, and to musttail calls. - if (!VarArgsToForward.empty() && - ((ForwardVarArgsTo && - CI->getCalledFunction() == ForwardVarArgsTo) || - CI->isMustTailCall())) { - // Collect attributes for non-vararg parameters. - AttributeList Attrs = CI->getAttributes(); - SmallVector<AttributeSet, 8> ArgAttrs; - if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) { - for (unsigned ArgNo = 0; - ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo) - ArgAttrs.push_back(Attrs.getParamAttributes(ArgNo)); - } - - // Add VarArg attributes. - ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end()); - Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttributes(), - Attrs.getRetAttributes(), ArgAttrs); - // Add VarArgs to existing parameters. - SmallVector<Value *, 6> Params(CI->arg_operands()); - Params.append(VarArgsToForward.begin(), VarArgsToForward.end()); - CallInst *NewCI = CallInst::Create( - CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI); - NewCI->setDebugLoc(CI->getDebugLoc()); - NewCI->setAttributes(Attrs); - NewCI->setCallingConv(CI->getCallingConv()); - CI->replaceAllUsesWith(NewCI); - CI->eraseFromParent(); - CI = NewCI; - } - - if (Function *F = CI->getCalledFunction()) - InlinedDeoptimizeCalls |= - F->getIntrinsicID() == Intrinsic::experimental_deoptimize; - - // We need to reduce the strength of any inlined tail calls. For - // musttail, we have to avoid introducing potential unbounded stack - // growth. For example, if functions 'f' and 'g' are mutually recursive - // with musttail, we can inline 'g' into 'f' so long as we preserve - // musttail on the cloned call to 'f'. If either the inlined call site - // or the cloned call site is *not* musttail, the program already has - // one frame of stack growth, so it's safe to remove musttail. Here is - // a table of example transformations: - // - // f -> musttail g -> musttail f ==> f -> musttail f - // f -> musttail g -> tail f ==> f -> tail f - // f -> g -> musttail f ==> f -> f - // f -> g -> tail f ==> f -> f - // - // Inlined notail calls should remain notail calls. - CallInst::TailCallKind ChildTCK = CI->getTailCallKind(); - if (ChildTCK != CallInst::TCK_NoTail) - ChildTCK = std::min(CallSiteTailKind, ChildTCK); - CI->setTailCallKind(ChildTCK); - InlinedMustTailCalls |= CI->isMustTailCall(); - - // Calls inlined through a 'nounwind' call site should be marked - // 'nounwind'. - if (MarkNoUnwind) - CI->setDoesNotThrow(); - } - } - } - - // Leave lifetime markers for the static alloca's, scoping them to the - // function we just inlined. - if (InsertLifetime && !IFI.StaticAllocas.empty()) { - IRBuilder<> builder(&FirstNewBlock->front()); - for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { - AllocaInst *AI = IFI.StaticAllocas[ai]; - // Don't mark swifterror allocas. They can't have bitcast uses. - if (AI->isSwiftError()) - continue; - - // If the alloca is already scoped to something smaller than the whole - // function then there's no need to add redundant, less accurate markers. - if (hasLifetimeMarkers(AI)) - continue; - - // Try to determine the size of the allocation. - ConstantInt *AllocaSize = nullptr; - if (ConstantInt *AIArraySize = - dyn_cast<ConstantInt>(AI->getArraySize())) { - auto &DL = Caller->getParent()->getDataLayout(); - Type *AllocaType = AI->getAllocatedType(); - uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType); - uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); - - // Don't add markers for zero-sized allocas. - if (AllocaArraySize == 0) - continue; - - // Check that array size doesn't saturate uint64_t and doesn't - // overflow when it's multiplied by type size. - if (AllocaArraySize != std::numeric_limits<uint64_t>::max() && - std::numeric_limits<uint64_t>::max() / AllocaArraySize >= - AllocaTypeSize) { - AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), - AllocaArraySize * AllocaTypeSize); - } - } - - builder.CreateLifetimeStart(AI, AllocaSize); - for (ReturnInst *RI : Returns) { - // Don't insert llvm.lifetime.end calls between a musttail or deoptimize - // call and a return. The return kills all local allocas. - if (InlinedMustTailCalls && - RI->getParent()->getTerminatingMustTailCall()) - continue; - if (InlinedDeoptimizeCalls && - RI->getParent()->getTerminatingDeoptimizeCall()) - continue; - IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize); - } - } - } - - // If the inlined code contained dynamic alloca instructions, wrap the inlined - // code with llvm.stacksave/llvm.stackrestore intrinsics. - if (InlinedFunctionInfo.ContainsDynamicAllocas) { - Module *M = Caller->getParent(); - // Get the two intrinsics we care about. - Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); - Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); - - // Insert the llvm.stacksave. - CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin()) - .CreateCall(StackSave, {}, "savedstack"); - - // Insert a call to llvm.stackrestore before any return instructions in the - // inlined function. - for (ReturnInst *RI : Returns) { - // Don't insert llvm.stackrestore calls between a musttail or deoptimize - // call and a return. The return will restore the stack pointer. - if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall()) - continue; - if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall()) - continue; - IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr); - } - } - - // If we are inlining for an invoke instruction, we must make sure to rewrite - // any call instructions into invoke instructions. This is sensitive to which - // funclet pads were top-level in the inlinee, so must be done before - // rewriting the "parent pad" links. - if (auto *II = dyn_cast<InvokeInst>(TheCall)) { - BasicBlock *UnwindDest = II->getUnwindDest(); - Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); - if (isa<LandingPadInst>(FirstNonPHI)) { - HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); - } else { - HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); - } - } - - // Update the lexical scopes of the new funclets and callsites. - // Anything that had 'none' as its parent is now nested inside the callsite's - // EHPad. - - if (CallSiteEHPad) { - for (Function::iterator BB = FirstNewBlock->getIterator(), - E = Caller->end(); - BB != E; ++BB) { - // Add bundle operands to any top-level call sites. - SmallVector<OperandBundleDef, 1> OpBundles; - for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) { - Instruction *I = &*BBI++; - CallSite CS(I); - if (!CS) - continue; - - // Skip call sites which are nounwind intrinsics. - auto *CalledFn = - dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); - if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow()) - continue; - - // Skip call sites which already have a "funclet" bundle. - if (CS.getOperandBundle(LLVMContext::OB_funclet)) - continue; - - CS.getOperandBundlesAsDefs(OpBundles); - OpBundles.emplace_back("funclet", CallSiteEHPad); - - Instruction *NewInst; - if (CS.isCall()) - NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I); - else if (CS.isCallBr()) - NewInst = CallBrInst::Create(cast<CallBrInst>(I), OpBundles, I); - else - NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I); - NewInst->takeName(I); - I->replaceAllUsesWith(NewInst); - I->eraseFromParent(); - - OpBundles.clear(); - } - - // It is problematic if the inlinee has a cleanupret which unwinds to - // caller and we inline it into a call site which doesn't unwind but into - // an EH pad that does. Such an edge must be dynamically unreachable. - // As such, we replace the cleanupret with unreachable. - if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator())) - if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally) - changeToUnreachable(CleanupRet, /*UseLLVMTrap=*/false); - - Instruction *I = BB->getFirstNonPHI(); - if (!I->isEHPad()) - continue; - - if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) { - if (isa<ConstantTokenNone>(CatchSwitch->getParentPad())) - CatchSwitch->setParentPad(CallSiteEHPad); - } else { - auto *FPI = cast<FuncletPadInst>(I); - if (isa<ConstantTokenNone>(FPI->getParentPad())) - FPI->setParentPad(CallSiteEHPad); - } - } - } - - if (InlinedDeoptimizeCalls) { - // We need to at least remove the deoptimizing returns from the Return set, - // so that the control flow from those returns does not get merged into the - // caller (but terminate it instead). If the caller's return type does not - // match the callee's return type, we also need to change the return type of - // the intrinsic. - if (Caller->getReturnType() == TheCall->getType()) { - auto NewEnd = llvm::remove_if(Returns, [](ReturnInst *RI) { - return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr; - }); - Returns.erase(NewEnd, Returns.end()); - } else { - SmallVector<ReturnInst *, 8> NormalReturns; - Function *NewDeoptIntrinsic = Intrinsic::getDeclaration( - Caller->getParent(), Intrinsic::experimental_deoptimize, - {Caller->getReturnType()}); - - for (ReturnInst *RI : Returns) { - CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall(); - if (!DeoptCall) { - NormalReturns.push_back(RI); - continue; - } - - // The calling convention on the deoptimize call itself may be bogus, - // since the code we're inlining may have undefined behavior (and may - // never actually execute at runtime); but all - // @llvm.experimental.deoptimize declarations have to have the same - // calling convention in a well-formed module. - auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv(); - NewDeoptIntrinsic->setCallingConv(CallingConv); - auto *CurBB = RI->getParent(); - RI->eraseFromParent(); - - SmallVector<Value *, 4> CallArgs(DeoptCall->arg_begin(), - DeoptCall->arg_end()); - - SmallVector<OperandBundleDef, 1> OpBundles; - DeoptCall->getOperandBundlesAsDefs(OpBundles); - DeoptCall->eraseFromParent(); - assert(!OpBundles.empty() && - "Expected at least the deopt operand bundle"); - - IRBuilder<> Builder(CurBB); - CallInst *NewDeoptCall = - Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles); - NewDeoptCall->setCallingConv(CallingConv); - if (NewDeoptCall->getType()->isVoidTy()) - Builder.CreateRetVoid(); - else - Builder.CreateRet(NewDeoptCall); - } - - // Leave behind the normal returns so we can merge control flow. - std::swap(Returns, NormalReturns); - } - } - - // Handle any inlined musttail call sites. In order for a new call site to be - // musttail, the source of the clone and the inlined call site must have been - // musttail. Therefore it's safe to return without merging control into the - // phi below. - if (InlinedMustTailCalls) { - // Check if we need to bitcast the result of any musttail calls. - Type *NewRetTy = Caller->getReturnType(); - bool NeedBitCast = !TheCall->use_empty() && TheCall->getType() != NewRetTy; - - // Handle the returns preceded by musttail calls separately. - SmallVector<ReturnInst *, 8> NormalReturns; - for (ReturnInst *RI : Returns) { - CallInst *ReturnedMustTail = - RI->getParent()->getTerminatingMustTailCall(); - if (!ReturnedMustTail) { - NormalReturns.push_back(RI); - continue; - } - if (!NeedBitCast) - continue; - - // Delete the old return and any preceding bitcast. - BasicBlock *CurBB = RI->getParent(); - auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue()); - RI->eraseFromParent(); - if (OldCast) - OldCast->eraseFromParent(); - - // Insert a new bitcast and return with the right type. - IRBuilder<> Builder(CurBB); - Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy)); - } - - // Leave behind the normal returns so we can merge control flow. - std::swap(Returns, NormalReturns); - } - - // Now that all of the transforms on the inlined code have taken place but - // before we splice the inlined code into the CFG and lose track of which - // blocks were actually inlined, collect the call sites. We only do this if - // call graph updates weren't requested, as those provide value handle based - // tracking of inlined call sites instead. - if (InlinedFunctionInfo.ContainsCalls && !IFI.CG) { - // Otherwise just collect the raw call sites that were inlined. - for (BasicBlock &NewBB : - make_range(FirstNewBlock->getIterator(), Caller->end())) - for (Instruction &I : NewBB) - if (auto CS = CallSite(&I)) - IFI.InlinedCallSites.push_back(CS); - } - - // If we cloned in _exactly one_ basic block, and if that block ends in a - // return instruction, we splice the body of the inlined callee directly into - // the calling basic block. - if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { - // Move all of the instructions right before the call. - OrigBB->getInstList().splice(TheCall->getIterator(), - FirstNewBlock->getInstList(), - FirstNewBlock->begin(), FirstNewBlock->end()); - // Remove the cloned basic block. - Caller->getBasicBlockList().pop_back(); - - // If the call site was an invoke instruction, add a branch to the normal - // destination. - if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { - BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); - NewBr->setDebugLoc(Returns[0]->getDebugLoc()); - } - - // If the return instruction returned a value, replace uses of the call with - // uses of the returned value. - if (!TheCall->use_empty()) { - ReturnInst *R = Returns[0]; - if (TheCall == R->getReturnValue()) - TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); - else - TheCall->replaceAllUsesWith(R->getReturnValue()); - } - // Since we are now done with the Call/Invoke, we can delete it. - TheCall->eraseFromParent(); - - // Since we are now done with the return instruction, delete it also. - Returns[0]->eraseFromParent(); - - // We are now done with the inlining. - return true; - } - - // Otherwise, we have the normal case, of more than one block to inline or - // multiple return sites. - - // We want to clone the entire callee function into the hole between the - // "starter" and "ender" blocks. How we accomplish this depends on whether - // this is an invoke instruction or a call instruction. - BasicBlock *AfterCallBB; - BranchInst *CreatedBranchToNormalDest = nullptr; - if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { - - // Add an unconditional branch to make this look like the CallInst case... - CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall); - - // Split the basic block. This guarantees that no PHI nodes will have to be - // updated due to new incoming edges, and make the invoke case more - // symmetric to the call case. - AfterCallBB = - OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(), - CalledFunc->getName() + ".exit"); - - } else { // It's a call - // If this is a call instruction, we need to split the basic block that - // the call lives in. - // - AfterCallBB = OrigBB->splitBasicBlock(TheCall->getIterator(), - CalledFunc->getName() + ".exit"); - } - - if (IFI.CallerBFI) { - // Copy original BB's block frequency to AfterCallBB - IFI.CallerBFI->setBlockFreq( - AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency()); - } - - // Change the branch that used to go to AfterCallBB to branch to the first - // basic block of the inlined function. - // - Instruction *Br = OrigBB->getTerminator(); - assert(Br && Br->getOpcode() == Instruction::Br && - "splitBasicBlock broken!"); - Br->setOperand(0, &*FirstNewBlock); - - // Now that the function is correct, make it a little bit nicer. In - // particular, move the basic blocks inserted from the end of the function - // into the space made by splitting the source basic block. - Caller->getBasicBlockList().splice(AfterCallBB->getIterator(), - Caller->getBasicBlockList(), FirstNewBlock, - Caller->end()); - - // Handle all of the return instructions that we just cloned in, and eliminate - // any users of the original call/invoke instruction. - Type *RTy = CalledFunc->getReturnType(); - - PHINode *PHI = nullptr; - if (Returns.size() > 1) { - // The PHI node should go at the front of the new basic block to merge all - // possible incoming values. - if (!TheCall->use_empty()) { - PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), - &AfterCallBB->front()); - // Anything that used the result of the function call should now use the - // PHI node as their operand. - TheCall->replaceAllUsesWith(PHI); - } - - // Loop over all of the return instructions adding entries to the PHI node - // as appropriate. - if (PHI) { - for (unsigned i = 0, e = Returns.size(); i != e; ++i) { - ReturnInst *RI = Returns[i]; - assert(RI->getReturnValue()->getType() == PHI->getType() && - "Ret value not consistent in function!"); - PHI->addIncoming(RI->getReturnValue(), RI->getParent()); - } - } - - // Add a branch to the merge points and remove return instructions. - DebugLoc Loc; - for (unsigned i = 0, e = Returns.size(); i != e; ++i) { - ReturnInst *RI = Returns[i]; - BranchInst* BI = BranchInst::Create(AfterCallBB, RI); - Loc = RI->getDebugLoc(); - BI->setDebugLoc(Loc); - RI->eraseFromParent(); - } - // We need to set the debug location to *somewhere* inside the - // inlined function. The line number may be nonsensical, but the - // instruction will at least be associated with the right - // function. - if (CreatedBranchToNormalDest) - CreatedBranchToNormalDest->setDebugLoc(Loc); - } else if (!Returns.empty()) { - // Otherwise, if there is exactly one return value, just replace anything - // using the return value of the call with the computed value. - if (!TheCall->use_empty()) { - if (TheCall == Returns[0]->getReturnValue()) - TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); - else - TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); - } - - // Update PHI nodes that use the ReturnBB to use the AfterCallBB. - BasicBlock *ReturnBB = Returns[0]->getParent(); - ReturnBB->replaceAllUsesWith(AfterCallBB); - - // Splice the code from the return block into the block that it will return - // to, which contains the code that was after the call. - AfterCallBB->getInstList().splice(AfterCallBB->begin(), - ReturnBB->getInstList()); - - if (CreatedBranchToNormalDest) - CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); - - // Delete the return instruction now and empty ReturnBB now. - Returns[0]->eraseFromParent(); - ReturnBB->eraseFromParent(); - } else if (!TheCall->use_empty()) { - // No returns, but something is using the return value of the call. Just - // nuke the result. - TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); - } - - // Since we are now done with the Call/Invoke, we can delete it. - TheCall->eraseFromParent(); - - // If we inlined any musttail calls and the original return is now - // unreachable, delete it. It can only contain a bitcast and ret. - if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB)) - AfterCallBB->eraseFromParent(); - - // We should always be able to fold the entry block of the function into the - // single predecessor of the block... - assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); - BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); - - // Splice the code entry block into calling block, right before the - // unconditional branch. - CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes - OrigBB->getInstList().splice(Br->getIterator(), CalleeEntry->getInstList()); - - // Remove the unconditional branch. - OrigBB->getInstList().erase(Br); - - // Now we can remove the CalleeEntry block, which is now empty. - Caller->getBasicBlockList().erase(CalleeEntry); - - // If we inserted a phi node, check to see if it has a single value (e.g. all - // the entries are the same or undef). If so, remove the PHI so it doesn't - // block other optimizations. - if (PHI) { - AssumptionCache *AC = - IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr; - auto &DL = Caller->getParent()->getDataLayout(); - if (Value *V = SimplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) { - PHI->replaceAllUsesWith(V); - PHI->eraseFromParent(); - } - } - - return true; -} diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp deleted file mode 100644 index 6c4fc1ceb991..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp +++ /dev/null @@ -1,62 +0,0 @@ -//===- InstructionNamer.cpp - Give anonymous instructions names -----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This is a little utility pass that gives instructions names, this is mostly -// useful when diffing the effect of an optimization because deleting an -// unnamed instruction can change all other instruction numbering, making the -// diff very noisy. -// -//===----------------------------------------------------------------------===// - -#include "llvm/IR/Function.h" -#include "llvm/IR/Type.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Utils.h" -using namespace llvm; - -namespace { - struct InstNamer : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - InstNamer() : FunctionPass(ID) { - initializeInstNamerPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &Info) const override { - Info.setPreservesAll(); - } - - bool runOnFunction(Function &F) override { - for (auto &Arg : F.args()) - if (!Arg.hasName()) - Arg.setName("arg"); - - for (BasicBlock &BB : F) { - if (!BB.hasName()) - BB.setName("bb"); - - for (Instruction &I : BB) - if (!I.hasName() && !I.getType()->isVoidTy()) - I.setName("tmp"); - } - return true; - } - }; - - char InstNamer::ID = 0; -} - -INITIALIZE_PASS(InstNamer, "instnamer", - "Assign names to anonymous instructions", false, false) -char &llvm::InstructionNamerID = InstNamer::ID; -//===----------------------------------------------------------------------===// -// -// InstructionNamer - Give any unnamed non-void instructions "tmp" names. -// -FunctionPass *llvm::createInstructionNamerPass() { - return new InstNamer(); -} diff --git a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp deleted file mode 100644 index 9082049c82da..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp +++ /dev/null @@ -1,673 +0,0 @@ -//===-- IntegerDivision.cpp - Expand integer division ---------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains an implementation of 32bit and 64bit scalar integer -// division for targets that don't have native support. It's largely derived -// from compiler-rt's implementations of __udivsi3 and __udivmoddi4, -// but hand-tuned for targets that prefer less control flow. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/IntegerDivision.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" -#include <utility> - -using namespace llvm; - -#define DEBUG_TYPE "integer-division" - -/// Generate code to compute the remainder of two signed integers. Returns the -/// remainder, which will have the sign of the dividend. Builder's insert point -/// should be pointing where the caller wants code generated, e.g. at the srem -/// instruction. This will generate a urem in the process, and Builder's insert -/// point will be pointing at the uren (if present, i.e. not folded), ready to -/// be expanded if the user wishes -static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor, - IRBuilder<> &Builder) { - unsigned BitWidth = Dividend->getType()->getIntegerBitWidth(); - ConstantInt *Shift; - - if (BitWidth == 64) { - Shift = Builder.getInt64(63); - } else { - assert(BitWidth == 32 && "Unexpected bit width"); - Shift = Builder.getInt32(31); - } - - // Following instructions are generated for both i32 (shift 31) and - // i64 (shift 63). - - // ; %dividend_sgn = ashr i32 %dividend, 31 - // ; %divisor_sgn = ashr i32 %divisor, 31 - // ; %dvd_xor = xor i32 %dividend, %dividend_sgn - // ; %dvs_xor = xor i32 %divisor, %divisor_sgn - // ; %u_dividend = sub i32 %dvd_xor, %dividend_sgn - // ; %u_divisor = sub i32 %dvs_xor, %divisor_sgn - // ; %urem = urem i32 %dividend, %divisor - // ; %xored = xor i32 %urem, %dividend_sgn - // ; %srem = sub i32 %xored, %dividend_sgn - Value *DividendSign = Builder.CreateAShr(Dividend, Shift); - Value *DivisorSign = Builder.CreateAShr(Divisor, Shift); - Value *DvdXor = Builder.CreateXor(Dividend, DividendSign); - Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign); - Value *UDividend = Builder.CreateSub(DvdXor, DividendSign); - Value *UDivisor = Builder.CreateSub(DvsXor, DivisorSign); - Value *URem = Builder.CreateURem(UDividend, UDivisor); - Value *Xored = Builder.CreateXor(URem, DividendSign); - Value *SRem = Builder.CreateSub(Xored, DividendSign); - - if (Instruction *URemInst = dyn_cast<Instruction>(URem)) - Builder.SetInsertPoint(URemInst); - - return SRem; -} - - -/// Generate code to compute the remainder of two unsigned integers. Returns the -/// remainder. Builder's insert point should be pointing where the caller wants -/// code generated, e.g. at the urem instruction. This will generate a udiv in -/// the process, and Builder's insert point will be pointing at the udiv (if -/// present, i.e. not folded), ready to be expanded if the user wishes -static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor, - IRBuilder<> &Builder) { - // Remainder = Dividend - Quotient*Divisor - - // Following instructions are generated for both i32 and i64 - - // ; %quotient = udiv i32 %dividend, %divisor - // ; %product = mul i32 %divisor, %quotient - // ; %remainder = sub i32 %dividend, %product - Value *Quotient = Builder.CreateUDiv(Dividend, Divisor); - Value *Product = Builder.CreateMul(Divisor, Quotient); - Value *Remainder = Builder.CreateSub(Dividend, Product); - - if (Instruction *UDiv = dyn_cast<Instruction>(Quotient)) - Builder.SetInsertPoint(UDiv); - - return Remainder; -} - -/// Generate code to divide two signed integers. Returns the quotient, rounded -/// towards 0. Builder's insert point should be pointing where the caller wants -/// code generated, e.g. at the sdiv instruction. This will generate a udiv in -/// the process, and Builder's insert point will be pointing at the udiv (if -/// present, i.e. not folded), ready to be expanded if the user wishes. -static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor, - IRBuilder<> &Builder) { - // Implementation taken from compiler-rt's __divsi3 and __divdi3 - - unsigned BitWidth = Dividend->getType()->getIntegerBitWidth(); - ConstantInt *Shift; - - if (BitWidth == 64) { - Shift = Builder.getInt64(63); - } else { - assert(BitWidth == 32 && "Unexpected bit width"); - Shift = Builder.getInt32(31); - } - - // Following instructions are generated for both i32 (shift 31) and - // i64 (shift 63). - - // ; %tmp = ashr i32 %dividend, 31 - // ; %tmp1 = ashr i32 %divisor, 31 - // ; %tmp2 = xor i32 %tmp, %dividend - // ; %u_dvnd = sub nsw i32 %tmp2, %tmp - // ; %tmp3 = xor i32 %tmp1, %divisor - // ; %u_dvsr = sub nsw i32 %tmp3, %tmp1 - // ; %q_sgn = xor i32 %tmp1, %tmp - // ; %q_mag = udiv i32 %u_dvnd, %u_dvsr - // ; %tmp4 = xor i32 %q_mag, %q_sgn - // ; %q = sub i32 %tmp4, %q_sgn - Value *Tmp = Builder.CreateAShr(Dividend, Shift); - Value *Tmp1 = Builder.CreateAShr(Divisor, Shift); - Value *Tmp2 = Builder.CreateXor(Tmp, Dividend); - Value *U_Dvnd = Builder.CreateSub(Tmp2, Tmp); - Value *Tmp3 = Builder.CreateXor(Tmp1, Divisor); - Value *U_Dvsr = Builder.CreateSub(Tmp3, Tmp1); - Value *Q_Sgn = Builder.CreateXor(Tmp1, Tmp); - Value *Q_Mag = Builder.CreateUDiv(U_Dvnd, U_Dvsr); - Value *Tmp4 = Builder.CreateXor(Q_Mag, Q_Sgn); - Value *Q = Builder.CreateSub(Tmp4, Q_Sgn); - - if (Instruction *UDiv = dyn_cast<Instruction>(Q_Mag)) - Builder.SetInsertPoint(UDiv); - - return Q; -} - -/// Generates code to divide two unsigned scalar 32-bit or 64-bit integers. -/// Returns the quotient, rounded towards 0. Builder's insert point should -/// point where the caller wants code generated, e.g. at the udiv instruction. -static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, - IRBuilder<> &Builder) { - // The basic algorithm can be found in the compiler-rt project's - // implementation of __udivsi3.c. Here, we do a lower-level IR based approach - // that's been hand-tuned to lessen the amount of control flow involved. - - // Some helper values - IntegerType *DivTy = cast<IntegerType>(Dividend->getType()); - unsigned BitWidth = DivTy->getBitWidth(); - - ConstantInt *Zero; - ConstantInt *One; - ConstantInt *NegOne; - ConstantInt *MSB; - - if (BitWidth == 64) { - Zero = Builder.getInt64(0); - One = Builder.getInt64(1); - NegOne = ConstantInt::getSigned(DivTy, -1); - MSB = Builder.getInt64(63); - } else { - assert(BitWidth == 32 && "Unexpected bit width"); - Zero = Builder.getInt32(0); - One = Builder.getInt32(1); - NegOne = ConstantInt::getSigned(DivTy, -1); - MSB = Builder.getInt32(31); - } - - ConstantInt *True = Builder.getTrue(); - - BasicBlock *IBB = Builder.GetInsertBlock(); - Function *F = IBB->getParent(); - Function *CTLZ = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, - DivTy); - - // Our CFG is going to look like: - // +---------------------+ - // | special-cases | - // | ... | - // +---------------------+ - // | | - // | +----------+ - // | | bb1 | - // | | ... | - // | +----------+ - // | | | - // | | +------------+ - // | | | preheader | - // | | | ... | - // | | +------------+ - // | | | - // | | | +---+ - // | | | | | - // | | +------------+ | - // | | | do-while | | - // | | | ... | | - // | | +------------+ | - // | | | | | - // | +-----------+ +---+ - // | | loop-exit | - // | | ... | - // | +-----------+ - // | | - // +-------+ - // | ... | - // | end | - // +-------+ - BasicBlock *SpecialCases = Builder.GetInsertBlock(); - SpecialCases->setName(Twine(SpecialCases->getName(), "_udiv-special-cases")); - BasicBlock *End = SpecialCases->splitBasicBlock(Builder.GetInsertPoint(), - "udiv-end"); - BasicBlock *LoopExit = BasicBlock::Create(Builder.getContext(), - "udiv-loop-exit", F, End); - BasicBlock *DoWhile = BasicBlock::Create(Builder.getContext(), - "udiv-do-while", F, End); - BasicBlock *Preheader = BasicBlock::Create(Builder.getContext(), - "udiv-preheader", F, End); - BasicBlock *BB1 = BasicBlock::Create(Builder.getContext(), - "udiv-bb1", F, End); - - // We'll be overwriting the terminator to insert our extra blocks - SpecialCases->getTerminator()->eraseFromParent(); - - // Same instructions are generated for both i32 (msb 31) and i64 (msb 63). - - // First off, check for special cases: dividend or divisor is zero, divisor - // is greater than dividend, and divisor is 1. - // ; special-cases: - // ; %ret0_1 = icmp eq i32 %divisor, 0 - // ; %ret0_2 = icmp eq i32 %dividend, 0 - // ; %ret0_3 = or i1 %ret0_1, %ret0_2 - // ; %tmp0 = tail call i32 @llvm.ctlz.i32(i32 %divisor, i1 true) - // ; %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true) - // ; %sr = sub nsw i32 %tmp0, %tmp1 - // ; %ret0_4 = icmp ugt i32 %sr, 31 - // ; %ret0 = or i1 %ret0_3, %ret0_4 - // ; %retDividend = icmp eq i32 %sr, 31 - // ; %retVal = select i1 %ret0, i32 0, i32 %dividend - // ; %earlyRet = or i1 %ret0, %retDividend - // ; br i1 %earlyRet, label %end, label %bb1 - Builder.SetInsertPoint(SpecialCases); - Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero); - Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero); - Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2); - Value *Tmp0 = Builder.CreateCall(CTLZ, {Divisor, True}); - Value *Tmp1 = Builder.CreateCall(CTLZ, {Dividend, True}); - Value *SR = Builder.CreateSub(Tmp0, Tmp1); - Value *Ret0_4 = Builder.CreateICmpUGT(SR, MSB); - Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4); - Value *RetDividend = Builder.CreateICmpEQ(SR, MSB); - Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend); - Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend); - Builder.CreateCondBr(EarlyRet, End, BB1); - - // ; bb1: ; preds = %special-cases - // ; %sr_1 = add i32 %sr, 1 - // ; %tmp2 = sub i32 31, %sr - // ; %q = shl i32 %dividend, %tmp2 - // ; %skipLoop = icmp eq i32 %sr_1, 0 - // ; br i1 %skipLoop, label %loop-exit, label %preheader - Builder.SetInsertPoint(BB1); - Value *SR_1 = Builder.CreateAdd(SR, One); - Value *Tmp2 = Builder.CreateSub(MSB, SR); - Value *Q = Builder.CreateShl(Dividend, Tmp2); - Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero); - Builder.CreateCondBr(SkipLoop, LoopExit, Preheader); - - // ; preheader: ; preds = %bb1 - // ; %tmp3 = lshr i32 %dividend, %sr_1 - // ; %tmp4 = add i32 %divisor, -1 - // ; br label %do-while - Builder.SetInsertPoint(Preheader); - Value *Tmp3 = Builder.CreateLShr(Dividend, SR_1); - Value *Tmp4 = Builder.CreateAdd(Divisor, NegOne); - Builder.CreateBr(DoWhile); - - // ; do-while: ; preds = %do-while, %preheader - // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ] - // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ] - // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ] - // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ] - // ; %tmp5 = shl i32 %r_1, 1 - // ; %tmp6 = lshr i32 %q_2, 31 - // ; %tmp7 = or i32 %tmp5, %tmp6 - // ; %tmp8 = shl i32 %q_2, 1 - // ; %q_1 = or i32 %carry_1, %tmp8 - // ; %tmp9 = sub i32 %tmp4, %tmp7 - // ; %tmp10 = ashr i32 %tmp9, 31 - // ; %carry = and i32 %tmp10, 1 - // ; %tmp11 = and i32 %tmp10, %divisor - // ; %r = sub i32 %tmp7, %tmp11 - // ; %sr_2 = add i32 %sr_3, -1 - // ; %tmp12 = icmp eq i32 %sr_2, 0 - // ; br i1 %tmp12, label %loop-exit, label %do-while - Builder.SetInsertPoint(DoWhile); - PHINode *Carry_1 = Builder.CreatePHI(DivTy, 2); - PHINode *SR_3 = Builder.CreatePHI(DivTy, 2); - PHINode *R_1 = Builder.CreatePHI(DivTy, 2); - PHINode *Q_2 = Builder.CreatePHI(DivTy, 2); - Value *Tmp5 = Builder.CreateShl(R_1, One); - Value *Tmp6 = Builder.CreateLShr(Q_2, MSB); - Value *Tmp7 = Builder.CreateOr(Tmp5, Tmp6); - Value *Tmp8 = Builder.CreateShl(Q_2, One); - Value *Q_1 = Builder.CreateOr(Carry_1, Tmp8); - Value *Tmp9 = Builder.CreateSub(Tmp4, Tmp7); - Value *Tmp10 = Builder.CreateAShr(Tmp9, MSB); - Value *Carry = Builder.CreateAnd(Tmp10, One); - Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor); - Value *R = Builder.CreateSub(Tmp7, Tmp11); - Value *SR_2 = Builder.CreateAdd(SR_3, NegOne); - Value *Tmp12 = Builder.CreateICmpEQ(SR_2, Zero); - Builder.CreateCondBr(Tmp12, LoopExit, DoWhile); - - // ; loop-exit: ; preds = %do-while, %bb1 - // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ] - // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ] - // ; %tmp13 = shl i32 %q_3, 1 - // ; %q_4 = or i32 %carry_2, %tmp13 - // ; br label %end - Builder.SetInsertPoint(LoopExit); - PHINode *Carry_2 = Builder.CreatePHI(DivTy, 2); - PHINode *Q_3 = Builder.CreatePHI(DivTy, 2); - Value *Tmp13 = Builder.CreateShl(Q_3, One); - Value *Q_4 = Builder.CreateOr(Carry_2, Tmp13); - Builder.CreateBr(End); - - // ; end: ; preds = %loop-exit, %special-cases - // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ] - // ; ret i32 %q_5 - Builder.SetInsertPoint(End, End->begin()); - PHINode *Q_5 = Builder.CreatePHI(DivTy, 2); - - // Populate the Phis, since all values have now been created. Our Phis were: - // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ] - Carry_1->addIncoming(Zero, Preheader); - Carry_1->addIncoming(Carry, DoWhile); - // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ] - SR_3->addIncoming(SR_1, Preheader); - SR_3->addIncoming(SR_2, DoWhile); - // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ] - R_1->addIncoming(Tmp3, Preheader); - R_1->addIncoming(R, DoWhile); - // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ] - Q_2->addIncoming(Q, Preheader); - Q_2->addIncoming(Q_1, DoWhile); - // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ] - Carry_2->addIncoming(Zero, BB1); - Carry_2->addIncoming(Carry, DoWhile); - // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ] - Q_3->addIncoming(Q, BB1); - Q_3->addIncoming(Q_1, DoWhile); - // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ] - Q_5->addIncoming(Q_4, LoopExit); - Q_5->addIncoming(RetVal, SpecialCases); - - return Q_5; -} - -/// Generate code to calculate the remainder of two integers, replacing Rem with -/// the generated code. This currently generates code using the udiv expansion, -/// but future work includes generating more specialized code, e.g. when more -/// information about the operands are known. Implements both 32bit and 64bit -/// scalar division. -/// -/// Replace Rem with generated code. -bool llvm::expandRemainder(BinaryOperator *Rem) { - assert((Rem->getOpcode() == Instruction::SRem || - Rem->getOpcode() == Instruction::URem) && - "Trying to expand remainder from a non-remainder function"); - - IRBuilder<> Builder(Rem); - - assert(!Rem->getType()->isVectorTy() && "Div over vectors not supported"); - assert((Rem->getType()->getIntegerBitWidth() == 32 || - Rem->getType()->getIntegerBitWidth() == 64) && - "Div of bitwidth other than 32 or 64 not supported"); - - // First prepare the sign if it's a signed remainder - if (Rem->getOpcode() == Instruction::SRem) { - Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0), - Rem->getOperand(1), Builder); - - // Check whether this is the insert point while Rem is still valid. - bool IsInsertPoint = Rem->getIterator() == Builder.GetInsertPoint(); - Rem->replaceAllUsesWith(Remainder); - Rem->dropAllReferences(); - Rem->eraseFromParent(); - - // If we didn't actually generate an urem instruction, we're done - // This happens for example if the input were constant. In this case the - // Builder insertion point was unchanged - if (IsInsertPoint) - return true; - - BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint()); - Rem = BO; - } - - Value *Remainder = generatedUnsignedRemainderCode(Rem->getOperand(0), - Rem->getOperand(1), - Builder); - - Rem->replaceAllUsesWith(Remainder); - Rem->dropAllReferences(); - Rem->eraseFromParent(); - - // Expand the udiv - if (BinaryOperator *UDiv = dyn_cast<BinaryOperator>(Builder.GetInsertPoint())) { - assert(UDiv->getOpcode() == Instruction::UDiv && "Non-udiv in expansion?"); - expandDivision(UDiv); - } - - return true; -} - - -/// Generate code to divide two integers, replacing Div with the generated -/// code. This currently generates code similarly to compiler-rt's -/// implementations, but future work includes generating more specialized code -/// when more information about the operands are known. Implements both -/// 32bit and 64bit scalar division. -/// -/// Replace Div with generated code. -bool llvm::expandDivision(BinaryOperator *Div) { - assert((Div->getOpcode() == Instruction::SDiv || - Div->getOpcode() == Instruction::UDiv) && - "Trying to expand division from a non-division function"); - - IRBuilder<> Builder(Div); - - assert(!Div->getType()->isVectorTy() && "Div over vectors not supported"); - assert((Div->getType()->getIntegerBitWidth() == 32 || - Div->getType()->getIntegerBitWidth() == 64) && - "Div of bitwidth other than 32 or 64 not supported"); - - // First prepare the sign if it's a signed division - if (Div->getOpcode() == Instruction::SDiv) { - // Lower the code to unsigned division, and reset Div to point to the udiv. - Value *Quotient = generateSignedDivisionCode(Div->getOperand(0), - Div->getOperand(1), Builder); - - // Check whether this is the insert point while Div is still valid. - bool IsInsertPoint = Div->getIterator() == Builder.GetInsertPoint(); - Div->replaceAllUsesWith(Quotient); - Div->dropAllReferences(); - Div->eraseFromParent(); - - // If we didn't actually generate an udiv instruction, we're done - // This happens for example if the input were constant. In this case the - // Builder insertion point was unchanged - if (IsInsertPoint) - return true; - - BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint()); - Div = BO; - } - - // Insert the unsigned division code - Value *Quotient = generateUnsignedDivisionCode(Div->getOperand(0), - Div->getOperand(1), - Builder); - Div->replaceAllUsesWith(Quotient); - Div->dropAllReferences(); - Div->eraseFromParent(); - - return true; -} - -/// Generate code to compute the remainder of two integers of bitwidth up to -/// 32 bits. Uses the above routines and extends the inputs/truncates the -/// outputs to operate in 32 bits; that is, these routines are good for targets -/// that have no or very little suppport for smaller than 32 bit integer -/// arithmetic. -/// -/// Replace Rem with emulation code. -bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) { - assert((Rem->getOpcode() == Instruction::SRem || - Rem->getOpcode() == Instruction::URem) && - "Trying to expand remainder from a non-remainder function"); - - Type *RemTy = Rem->getType(); - assert(!RemTy->isVectorTy() && "Div over vectors not supported"); - - unsigned RemTyBitWidth = RemTy->getIntegerBitWidth(); - - assert(RemTyBitWidth <= 32 && - "Div of bitwidth greater than 32 not supported"); - - if (RemTyBitWidth == 32) - return expandRemainder(Rem); - - // If bitwidth smaller than 32 extend inputs, extend output and proceed - // with 32 bit division. - IRBuilder<> Builder(Rem); - - Value *ExtDividend; - Value *ExtDivisor; - Value *ExtRem; - Value *Trunc; - Type *Int32Ty = Builder.getInt32Ty(); - - if (Rem->getOpcode() == Instruction::SRem) { - ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int32Ty); - ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int32Ty); - ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor); - } else { - ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int32Ty); - ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int32Ty); - ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor); - } - Trunc = Builder.CreateTrunc(ExtRem, RemTy); - - Rem->replaceAllUsesWith(Trunc); - Rem->dropAllReferences(); - Rem->eraseFromParent(); - - return expandRemainder(cast<BinaryOperator>(ExtRem)); -} - -/// Generate code to compute the remainder of two integers of bitwidth up to -/// 64 bits. Uses the above routines and extends the inputs/truncates the -/// outputs to operate in 64 bits. -/// -/// Replace Rem with emulation code. -bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) { - assert((Rem->getOpcode() == Instruction::SRem || - Rem->getOpcode() == Instruction::URem) && - "Trying to expand remainder from a non-remainder function"); - - Type *RemTy = Rem->getType(); - assert(!RemTy->isVectorTy() && "Div over vectors not supported"); - - unsigned RemTyBitWidth = RemTy->getIntegerBitWidth(); - - assert(RemTyBitWidth <= 64 && "Div of bitwidth greater than 64 not supported"); - - if (RemTyBitWidth == 64) - return expandRemainder(Rem); - - // If bitwidth smaller than 64 extend inputs, extend output and proceed - // with 64 bit division. - IRBuilder<> Builder(Rem); - - Value *ExtDividend; - Value *ExtDivisor; - Value *ExtRem; - Value *Trunc; - Type *Int64Ty = Builder.getInt64Ty(); - - if (Rem->getOpcode() == Instruction::SRem) { - ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int64Ty); - ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int64Ty); - ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor); - } else { - ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int64Ty); - ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int64Ty); - ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor); - } - Trunc = Builder.CreateTrunc(ExtRem, RemTy); - - Rem->replaceAllUsesWith(Trunc); - Rem->dropAllReferences(); - Rem->eraseFromParent(); - - return expandRemainder(cast<BinaryOperator>(ExtRem)); -} - -/// Generate code to divide two integers of bitwidth up to 32 bits. Uses the -/// above routines and extends the inputs/truncates the outputs to operate -/// in 32 bits; that is, these routines are good for targets that have no -/// or very little support for smaller than 32 bit integer arithmetic. -/// -/// Replace Div with emulation code. -bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) { - assert((Div->getOpcode() == Instruction::SDiv || - Div->getOpcode() == Instruction::UDiv) && - "Trying to expand division from a non-division function"); - - Type *DivTy = Div->getType(); - assert(!DivTy->isVectorTy() && "Div over vectors not supported"); - - unsigned DivTyBitWidth = DivTy->getIntegerBitWidth(); - - assert(DivTyBitWidth <= 32 && "Div of bitwidth greater than 32 not supported"); - - if (DivTyBitWidth == 32) - return expandDivision(Div); - - // If bitwidth smaller than 32 extend inputs, extend output and proceed - // with 32 bit division. - IRBuilder<> Builder(Div); - - Value *ExtDividend; - Value *ExtDivisor; - Value *ExtDiv; - Value *Trunc; - Type *Int32Ty = Builder.getInt32Ty(); - - if (Div->getOpcode() == Instruction::SDiv) { - ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int32Ty); - ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int32Ty); - ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor); - } else { - ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int32Ty); - ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int32Ty); - ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor); - } - Trunc = Builder.CreateTrunc(ExtDiv, DivTy); - - Div->replaceAllUsesWith(Trunc); - Div->dropAllReferences(); - Div->eraseFromParent(); - - return expandDivision(cast<BinaryOperator>(ExtDiv)); -} - -/// Generate code to divide two integers of bitwidth up to 64 bits. Uses the -/// above routines and extends the inputs/truncates the outputs to operate -/// in 64 bits. -/// -/// Replace Div with emulation code. -bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) { - assert((Div->getOpcode() == Instruction::SDiv || - Div->getOpcode() == Instruction::UDiv) && - "Trying to expand division from a non-division function"); - - Type *DivTy = Div->getType(); - assert(!DivTy->isVectorTy() && "Div over vectors not supported"); - - unsigned DivTyBitWidth = DivTy->getIntegerBitWidth(); - - assert(DivTyBitWidth <= 64 && - "Div of bitwidth greater than 64 not supported"); - - if (DivTyBitWidth == 64) - return expandDivision(Div); - - // If bitwidth smaller than 64 extend inputs, extend output and proceed - // with 64 bit division. - IRBuilder<> Builder(Div); - - Value *ExtDividend; - Value *ExtDivisor; - Value *ExtDiv; - Value *Trunc; - Type *Int64Ty = Builder.getInt64Ty(); - - if (Div->getOpcode() == Instruction::SDiv) { - ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int64Ty); - ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int64Ty); - ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor); - } else { - ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int64Ty); - ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int64Ty); - ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor); - } - Trunc = Builder.CreateTrunc(ExtDiv, DivTy); - - Div->replaceAllUsesWith(Trunc); - Div->dropAllReferences(); - Div->eraseFromParent(); - - return expandDivision(cast<BinaryOperator>(ExtDiv)); -} diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp deleted file mode 100644 index 29e7c5260f46..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp +++ /dev/null @@ -1,497 +0,0 @@ -//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass transforms loops by placing phi nodes at the end of the loops for -// all values that are live across the loop boundary. For example, it turns -// the left into the right code: -// -// for (...) for (...) -// if (c) if (c) -// X1 = ... X1 = ... -// else else -// X2 = ... X2 = ... -// X3 = phi(X1, X2) X3 = phi(X1, X2) -// ... = X3 + 4 X4 = phi(X3) -// ... = X4 + 4 -// -// This is still valid LLVM; the extra phi nodes are purely redundant, and will -// be trivially eliminated by InstCombine. The major benefit of this -// transformation is that it makes many other loop optimizations, such as -// LoopUnswitching, simpler. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/LCSSA.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/MemorySSA.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/PredIteratorCache.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/LoopUtils.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" -using namespace llvm; - -#define DEBUG_TYPE "lcssa" - -STATISTIC(NumLCSSA, "Number of live out of a loop variables"); - -#ifdef EXPENSIVE_CHECKS -static bool VerifyLoopLCSSA = true; -#else -static bool VerifyLoopLCSSA = false; -#endif -static cl::opt<bool, true> - VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA), - cl::Hidden, - cl::desc("Verify loop lcssa form (time consuming)")); - -/// Return true if the specified block is in the list. -static bool isExitBlock(BasicBlock *BB, - const SmallVectorImpl<BasicBlock *> &ExitBlocks) { - return is_contained(ExitBlocks, BB); -} - -/// For every instruction from the worklist, check to see if it has any uses -/// that are outside the current loop. If so, insert LCSSA PHI nodes and -/// rewrite the uses. -bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, - DominatorTree &DT, LoopInfo &LI) { - SmallVector<Use *, 16> UsesToRewrite; - SmallSetVector<PHINode *, 16> PHIsToRemove; - PredIteratorCache PredCache; - bool Changed = false; - - // Cache the Loop ExitBlocks across this loop. We expect to get a lot of - // instructions within the same loops, computing the exit blocks is - // expensive, and we're not mutating the loop structure. - SmallDenseMap<Loop*, SmallVector<BasicBlock *,1>> LoopExitBlocks; - - while (!Worklist.empty()) { - UsesToRewrite.clear(); - - Instruction *I = Worklist.pop_back_val(); - assert(!I->getType()->isTokenTy() && "Tokens shouldn't be in the worklist"); - BasicBlock *InstBB = I->getParent(); - Loop *L = LI.getLoopFor(InstBB); - assert(L && "Instruction belongs to a BB that's not part of a loop"); - if (!LoopExitBlocks.count(L)) - L->getExitBlocks(LoopExitBlocks[L]); - assert(LoopExitBlocks.count(L)); - const SmallVectorImpl<BasicBlock *> &ExitBlocks = LoopExitBlocks[L]; - - if (ExitBlocks.empty()) - continue; - - for (Use &U : I->uses()) { - Instruction *User = cast<Instruction>(U.getUser()); - BasicBlock *UserBB = User->getParent(); - if (auto *PN = dyn_cast<PHINode>(User)) - UserBB = PN->getIncomingBlock(U); - - if (InstBB != UserBB && !L->contains(UserBB)) - UsesToRewrite.push_back(&U); - } - - // If there are no uses outside the loop, exit with no change. - if (UsesToRewrite.empty()) - continue; - - ++NumLCSSA; // We are applying the transformation - - // Invoke instructions are special in that their result value is not - // available along their unwind edge. The code below tests to see whether - // DomBB dominates the value, so adjust DomBB to the normal destination - // block, which is effectively where the value is first usable. - BasicBlock *DomBB = InstBB; - if (auto *Inv = dyn_cast<InvokeInst>(I)) - DomBB = Inv->getNormalDest(); - - DomTreeNode *DomNode = DT.getNode(DomBB); - - SmallVector<PHINode *, 16> AddedPHIs; - SmallVector<PHINode *, 8> PostProcessPHIs; - - SmallVector<PHINode *, 4> InsertedPHIs; - SSAUpdater SSAUpdate(&InsertedPHIs); - SSAUpdate.Initialize(I->getType(), I->getName()); - - // Insert the LCSSA phi's into all of the exit blocks dominated by the - // value, and add them to the Phi's map. - for (BasicBlock *ExitBB : ExitBlocks) { - if (!DT.dominates(DomNode, DT.getNode(ExitBB))) - continue; - - // If we already inserted something for this BB, don't reprocess it. - if (SSAUpdate.HasValueForBlock(ExitBB)) - continue; - - PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB), - I->getName() + ".lcssa", &ExitBB->front()); - // Get the debug location from the original instruction. - PN->setDebugLoc(I->getDebugLoc()); - // Add inputs from inside the loop for this PHI. - for (BasicBlock *Pred : PredCache.get(ExitBB)) { - PN->addIncoming(I, Pred); - - // If the exit block has a predecessor not within the loop, arrange for - // the incoming value use corresponding to that predecessor to be - // rewritten in terms of a different LCSSA PHI. - if (!L->contains(Pred)) - UsesToRewrite.push_back( - &PN->getOperandUse(PN->getOperandNumForIncomingValue( - PN->getNumIncomingValues() - 1))); - } - - AddedPHIs.push_back(PN); - - // Remember that this phi makes the value alive in this block. - SSAUpdate.AddAvailableValue(ExitBB, PN); - - // LoopSimplify might fail to simplify some loops (e.g. when indirect - // branches are involved). In such situations, it might happen that an - // exit for Loop L1 is the header of a disjoint Loop L2. Thus, when we - // create PHIs in such an exit block, we are also inserting PHIs into L2's - // header. This could break LCSSA form for L2 because these inserted PHIs - // can also have uses outside of L2. Remember all PHIs in such situation - // as to revisit than later on. FIXME: Remove this if indirectbr support - // into LoopSimplify gets improved. - if (auto *OtherLoop = LI.getLoopFor(ExitBB)) - if (!L->contains(OtherLoop)) - PostProcessPHIs.push_back(PN); - } - - // Rewrite all uses outside the loop in terms of the new PHIs we just - // inserted. - for (Use *UseToRewrite : UsesToRewrite) { - // If this use is in an exit block, rewrite to use the newly inserted PHI. - // This is required for correctness because SSAUpdate doesn't handle uses - // in the same block. It assumes the PHI we inserted is at the end of the - // block. - Instruction *User = cast<Instruction>(UseToRewrite->getUser()); - BasicBlock *UserBB = User->getParent(); - if (auto *PN = dyn_cast<PHINode>(User)) - UserBB = PN->getIncomingBlock(*UseToRewrite); - - if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) { - // Tell the VHs that the uses changed. This updates SCEV's caches. - if (UseToRewrite->get()->hasValueHandle()) - ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front()); - UseToRewrite->set(&UserBB->front()); - continue; - } - - // If we added a single PHI, it must dominate all uses and we can directly - // rename it. - if (AddedPHIs.size() == 1) { - // Tell the VHs that the uses changed. This updates SCEV's caches. - // We might call ValueIsRAUWd multiple times for the same value. - if (UseToRewrite->get()->hasValueHandle()) - ValueHandleBase::ValueIsRAUWd(*UseToRewrite, AddedPHIs[0]); - UseToRewrite->set(AddedPHIs[0]); - continue; - } - - // Otherwise, do full PHI insertion. - SSAUpdate.RewriteUse(*UseToRewrite); - } - - SmallVector<DbgValueInst *, 4> DbgValues; - llvm::findDbgValues(DbgValues, I); - - // Update pre-existing debug value uses that reside outside the loop. - auto &Ctx = I->getContext(); - for (auto DVI : DbgValues) { - BasicBlock *UserBB = DVI->getParent(); - if (InstBB == UserBB || L->contains(UserBB)) - continue; - // We currently only handle debug values residing in blocks that were - // traversed while rewriting the uses. If we inserted just a single PHI, - // we will handle all relevant debug values. - Value *V = AddedPHIs.size() == 1 ? AddedPHIs[0] - : SSAUpdate.FindValueForBlock(UserBB); - if (V) - DVI->setOperand(0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(V))); - } - - // SSAUpdater might have inserted phi-nodes inside other loops. We'll need - // to post-process them to keep LCSSA form. - for (PHINode *InsertedPN : InsertedPHIs) { - if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent())) - if (!L->contains(OtherLoop)) - PostProcessPHIs.push_back(InsertedPN); - } - - // Post process PHI instructions that were inserted into another disjoint - // loop and update their exits properly. - for (auto *PostProcessPN : PostProcessPHIs) - if (!PostProcessPN->use_empty()) - Worklist.push_back(PostProcessPN); - - // Keep track of PHI nodes that we want to remove because they did not have - // any uses rewritten. If the new PHI is used, store it so that we can - // try to propagate dbg.value intrinsics to it. - SmallVector<PHINode *, 2> NeedDbgValues; - for (PHINode *PN : AddedPHIs) - if (PN->use_empty()) - PHIsToRemove.insert(PN); - else - NeedDbgValues.push_back(PN); - insertDebugValuesForPHIs(InstBB, NeedDbgValues); - Changed = true; - } - // Remove PHI nodes that did not have any uses rewritten. We need to redo the - // use_empty() check here, because even if the PHI node wasn't used when added - // to PHIsToRemove, later added PHI nodes can be using it. This cleanup is - // not guaranteed to handle trees/cycles of PHI nodes that only are used by - // each other. Such situations has only been noticed when the input IR - // contains unreachable code, and leaving some extra redundant PHI nodes in - // such situations is considered a minor problem. - for (PHINode *PN : PHIsToRemove) - if (PN->use_empty()) - PN->eraseFromParent(); - return Changed; -} - -// Compute the set of BasicBlocks in the loop `L` dominating at least one exit. -static void computeBlocksDominatingExits( - Loop &L, DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks, - SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) { - SmallVector<BasicBlock *, 8> BBWorklist; - - // We start from the exit blocks, as every block trivially dominates itself - // (not strictly). - for (BasicBlock *BB : ExitBlocks) - BBWorklist.push_back(BB); - - while (!BBWorklist.empty()) { - BasicBlock *BB = BBWorklist.pop_back_val(); - - // Check if this is a loop header. If this is the case, we're done. - if (L.getHeader() == BB) - continue; - - // Otherwise, add its immediate predecessor in the dominator tree to the - // worklist, unless we visited it already. - BasicBlock *IDomBB = DT.getNode(BB)->getIDom()->getBlock(); - - // Exit blocks can have an immediate dominator not beloinging to the - // loop. For an exit block to be immediately dominated by another block - // outside the loop, it implies not all paths from that dominator, to the - // exit block, go through the loop. - // Example: - // - // |---- A - // | | - // | B<-- - // | | | - // |---> C -- - // | - // D - // - // C is the exit block of the loop and it's immediately dominated by A, - // which doesn't belong to the loop. - if (!L.contains(IDomBB)) - continue; - - if (BlocksDominatingExits.insert(IDomBB)) - BBWorklist.push_back(IDomBB); - } -} - -bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, - ScalarEvolution *SE) { - bool Changed = false; - -#ifdef EXPENSIVE_CHECKS - // Verify all sub-loops are in LCSSA form already. - for (Loop *SubLoop: L) - assert(SubLoop->isRecursivelyLCSSAForm(DT, *LI) && "Subloop not in LCSSA!"); -#endif - - SmallVector<BasicBlock *, 8> ExitBlocks; - L.getExitBlocks(ExitBlocks); - if (ExitBlocks.empty()) - return false; - - SmallSetVector<BasicBlock *, 8> BlocksDominatingExits; - - // We want to avoid use-scanning leveraging dominance informations. - // If a block doesn't dominate any of the loop exits, the none of the values - // defined in the loop can be used outside. - // We compute the set of blocks fullfilling the conditions in advance - // walking the dominator tree upwards until we hit a loop header. - computeBlocksDominatingExits(L, DT, ExitBlocks, BlocksDominatingExits); - - SmallVector<Instruction *, 8> Worklist; - - // Look at all the instructions in the loop, checking to see if they have uses - // outside the loop. If so, put them into the worklist to rewrite those uses. - for (BasicBlock *BB : BlocksDominatingExits) { - // Skip blocks that are part of any sub-loops, they must be in LCSSA - // already. - if (LI->getLoopFor(BB) != &L) - continue; - for (Instruction &I : *BB) { - // Reject two common cases fast: instructions with no uses (like stores) - // and instructions with one use that is in the same block as this. - if (I.use_empty() || - (I.hasOneUse() && I.user_back()->getParent() == BB && - !isa<PHINode>(I.user_back()))) - continue; - - // Tokens cannot be used in PHI nodes, so we skip over them. - // We can run into tokens which are live out of a loop with catchswitch - // instructions in Windows EH if the catchswitch has one catchpad which - // is inside the loop and another which is not. - if (I.getType()->isTokenTy()) - continue; - - Worklist.push_back(&I); - } - } - Changed = formLCSSAForInstructions(Worklist, DT, *LI); - - // If we modified the code, remove any caches about the loop from SCEV to - // avoid dangling entries. - // FIXME: This is a big hammer, can we clear the cache more selectively? - if (SE && Changed) - SE->forgetLoop(&L); - - assert(L.isLCSSAForm(DT)); - - return Changed; -} - -/// Process a loop nest depth first. -bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, - ScalarEvolution *SE) { - bool Changed = false; - - // Recurse depth-first through inner loops. - for (Loop *SubLoop : L.getSubLoops()) - Changed |= formLCSSARecursively(*SubLoop, DT, LI, SE); - - Changed |= formLCSSA(L, DT, LI, SE); - return Changed; -} - -/// Process all loops in the function, inner-most out. -static bool formLCSSAOnAllLoops(LoopInfo *LI, DominatorTree &DT, - ScalarEvolution *SE) { - bool Changed = false; - for (auto &L : *LI) - Changed |= formLCSSARecursively(*L, DT, LI, SE); - return Changed; -} - -namespace { -struct LCSSAWrapperPass : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - LCSSAWrapperPass() : FunctionPass(ID) { - initializeLCSSAWrapperPassPass(*PassRegistry::getPassRegistry()); - } - - // Cached analysis information for the current function. - DominatorTree *DT; - LoopInfo *LI; - ScalarEvolution *SE; - - bool runOnFunction(Function &F) override; - void verifyAnalysis() const override { - // This check is very expensive. On the loop intensive compiles it may cause - // up to 10x slowdown. Currently it's disabled by default. LPPassManager - // always does limited form of the LCSSA verification. Similar reasoning - // was used for the LoopInfo verifier. - if (VerifyLoopLCSSA) { - assert(all_of(*LI, - [&](Loop *L) { - return L->isRecursivelyLCSSAForm(*DT, *LI); - }) && - "LCSSA form is broken!"); - } - }; - - /// This transformation requires natural loop information & requires that - /// loop preheaders be inserted into the CFG. It maintains both of these, - /// as well as the CFG. It also requires dominator information. - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<LoopInfoWrapperPass>(); - AU.addPreservedID(LoopSimplifyID); - AU.addPreserved<AAResultsWrapperPass>(); - AU.addPreserved<BasicAAWrapperPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - AU.addPreserved<ScalarEvolutionWrapperPass>(); - AU.addPreserved<SCEVAAWrapperPass>(); - AU.addPreserved<BranchProbabilityInfoWrapperPass>(); - AU.addPreserved<MemorySSAWrapperPass>(); - - // This is needed to perform LCSSA verification inside LPPassManager - AU.addRequired<LCSSAVerificationPass>(); - AU.addPreserved<LCSSAVerificationPass>(); - } -}; -} - -char LCSSAWrapperPass::ID = 0; -INITIALIZE_PASS_BEGIN(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass", - false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LCSSAVerificationPass) -INITIALIZE_PASS_END(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass", - false, false) - -Pass *llvm::createLCSSAPass() { return new LCSSAWrapperPass(); } -char &llvm::LCSSAID = LCSSAWrapperPass::ID; - -/// Transform \p F into loop-closed SSA form. -bool LCSSAWrapperPass::runOnFunction(Function &F) { - LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); - SE = SEWP ? &SEWP->getSE() : nullptr; - - return formLCSSAOnAllLoops(LI, *DT, SE); -} - -PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) { - auto &LI = AM.getResult<LoopAnalysis>(F); - auto &DT = AM.getResult<DominatorTreeAnalysis>(F); - auto *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F); - if (!formLCSSAOnAllLoops(&LI, DT, SE)) - return PreservedAnalyses::all(); - - PreservedAnalyses PA; - PA.preserveSet<CFGAnalyses>(); - PA.preserve<BasicAA>(); - PA.preserve<GlobalsAA>(); - PA.preserve<SCEVAA>(); - PA.preserve<ScalarEvolutionAnalysis>(); - // BPI maps terminators to probabilities, since we don't modify the CFG, no - // updates are needed to preserve it. - PA.preserve<BranchProbabilityAnalysis>(); - PA.preserve<MemorySSAAnalysis>(); - return PA; -} diff --git a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp deleted file mode 100644 index 8c67d1dc6eb3..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp +++ /dev/null @@ -1,561 +0,0 @@ -//===-- LibCallsShrinkWrap.cpp ----------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass shrink-wraps a call to function if the result is not used. -// The call can set errno but is otherwise side effect free. For example: -// sqrt(val); -// is transformed to -// if (val < 0) -// sqrt(val); -// Even if the result of library call is not being used, the compiler cannot -// safely delete the call because the function can set errno on error -// conditions. -// Note in many functions, the error condition solely depends on the incoming -// parameter. In this optimization, we can generate the condition can lead to -// the errno to shrink-wrap the call. Since the chances of hitting the error -// condition is low, the runtime call is effectively eliminated. -// -// These partially dead calls are usually results of C++ abstraction penalty -// exposed by inlining. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstVisitor.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/MDBuilder.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -using namespace llvm; - -#define DEBUG_TYPE "libcalls-shrinkwrap" - -STATISTIC(NumWrappedOneCond, "Number of One-Condition Wrappers Inserted"); -STATISTIC(NumWrappedTwoCond, "Number of Two-Condition Wrappers Inserted"); - -namespace { -class LibCallsShrinkWrapLegacyPass : public FunctionPass { -public: - static char ID; // Pass identification, replacement for typeid - explicit LibCallsShrinkWrapLegacyPass() : FunctionPass(ID) { - initializeLibCallsShrinkWrapLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - void getAnalysisUsage(AnalysisUsage &AU) const override; - bool runOnFunction(Function &F) override; -}; -} - -char LibCallsShrinkWrapLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap", - "Conditionally eliminate dead library calls", false, - false) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap", - "Conditionally eliminate dead library calls", false, false) - -namespace { -class LibCallsShrinkWrap : public InstVisitor<LibCallsShrinkWrap> { -public: - LibCallsShrinkWrap(const TargetLibraryInfo &TLI, DominatorTree *DT) - : TLI(TLI), DT(DT){}; - void visitCallInst(CallInst &CI) { checkCandidate(CI); } - bool perform() { - bool Changed = false; - for (auto &CI : WorkList) { - LLVM_DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName() - << "\n"); - if (perform(CI)) { - Changed = true; - LLVM_DEBUG(dbgs() << "Transformed\n"); - } - } - return Changed; - } - -private: - bool perform(CallInst *CI); - void checkCandidate(CallInst &CI); - void shrinkWrapCI(CallInst *CI, Value *Cond); - bool performCallDomainErrorOnly(CallInst *CI, const LibFunc &Func); - bool performCallErrors(CallInst *CI, const LibFunc &Func); - bool performCallRangeErrorOnly(CallInst *CI, const LibFunc &Func); - Value *generateOneRangeCond(CallInst *CI, const LibFunc &Func); - Value *generateTwoRangeCond(CallInst *CI, const LibFunc &Func); - Value *generateCondForPow(CallInst *CI, const LibFunc &Func); - - // Create an OR of two conditions. - Value *createOrCond(CallInst *CI, CmpInst::Predicate Cmp, float Val, - CmpInst::Predicate Cmp2, float Val2) { - IRBuilder<> BBBuilder(CI); - Value *Arg = CI->getArgOperand(0); - auto Cond2 = createCond(BBBuilder, Arg, Cmp2, Val2); - auto Cond1 = createCond(BBBuilder, Arg, Cmp, Val); - return BBBuilder.CreateOr(Cond1, Cond2); - } - - // Create a single condition using IRBuilder. - Value *createCond(IRBuilder<> &BBBuilder, Value *Arg, CmpInst::Predicate Cmp, - float Val) { - Constant *V = ConstantFP::get(BBBuilder.getContext(), APFloat(Val)); - if (!Arg->getType()->isFloatTy()) - V = ConstantExpr::getFPExtend(V, Arg->getType()); - return BBBuilder.CreateFCmp(Cmp, Arg, V); - } - - // Create a single condition. - Value *createCond(CallInst *CI, CmpInst::Predicate Cmp, float Val) { - IRBuilder<> BBBuilder(CI); - Value *Arg = CI->getArgOperand(0); - return createCond(BBBuilder, Arg, Cmp, Val); - } - - const TargetLibraryInfo &TLI; - DominatorTree *DT; - SmallVector<CallInst *, 16> WorkList; -}; -} // end anonymous namespace - -// Perform the transformation to calls with errno set by domain error. -bool LibCallsShrinkWrap::performCallDomainErrorOnly(CallInst *CI, - const LibFunc &Func) { - Value *Cond = nullptr; - - switch (Func) { - case LibFunc_acos: // DomainError: (x < -1 || x > 1) - case LibFunc_acosf: // Same as acos - case LibFunc_acosl: // Same as acos - case LibFunc_asin: // DomainError: (x < -1 || x > 1) - case LibFunc_asinf: // Same as asin - case LibFunc_asinl: // Same as asin - { - ++NumWrappedTwoCond; - Cond = createOrCond(CI, CmpInst::FCMP_OLT, -1.0f, CmpInst::FCMP_OGT, 1.0f); - break; - } - case LibFunc_cos: // DomainError: (x == +inf || x == -inf) - case LibFunc_cosf: // Same as cos - case LibFunc_cosl: // Same as cos - case LibFunc_sin: // DomainError: (x == +inf || x == -inf) - case LibFunc_sinf: // Same as sin - case LibFunc_sinl: // Same as sin - { - ++NumWrappedTwoCond; - Cond = createOrCond(CI, CmpInst::FCMP_OEQ, INFINITY, CmpInst::FCMP_OEQ, - -INFINITY); - break; - } - case LibFunc_acosh: // DomainError: (x < 1) - case LibFunc_acoshf: // Same as acosh - case LibFunc_acoshl: // Same as acosh - { - ++NumWrappedOneCond; - Cond = createCond(CI, CmpInst::FCMP_OLT, 1.0f); - break; - } - case LibFunc_sqrt: // DomainError: (x < 0) - case LibFunc_sqrtf: // Same as sqrt - case LibFunc_sqrtl: // Same as sqrt - { - ++NumWrappedOneCond; - Cond = createCond(CI, CmpInst::FCMP_OLT, 0.0f); - break; - } - default: - return false; - } - shrinkWrapCI(CI, Cond); - return true; -} - -// Perform the transformation to calls with errno set by range error. -bool LibCallsShrinkWrap::performCallRangeErrorOnly(CallInst *CI, - const LibFunc &Func) { - Value *Cond = nullptr; - - switch (Func) { - case LibFunc_cosh: - case LibFunc_coshf: - case LibFunc_coshl: - case LibFunc_exp: - case LibFunc_expf: - case LibFunc_expl: - case LibFunc_exp10: - case LibFunc_exp10f: - case LibFunc_exp10l: - case LibFunc_exp2: - case LibFunc_exp2f: - case LibFunc_exp2l: - case LibFunc_sinh: - case LibFunc_sinhf: - case LibFunc_sinhl: { - Cond = generateTwoRangeCond(CI, Func); - break; - } - case LibFunc_expm1: // RangeError: (709, inf) - case LibFunc_expm1f: // RangeError: (88, inf) - case LibFunc_expm1l: // RangeError: (11356, inf) - { - Cond = generateOneRangeCond(CI, Func); - break; - } - default: - return false; - } - shrinkWrapCI(CI, Cond); - return true; -} - -// Perform the transformation to calls with errno set by combination of errors. -bool LibCallsShrinkWrap::performCallErrors(CallInst *CI, - const LibFunc &Func) { - Value *Cond = nullptr; - - switch (Func) { - case LibFunc_atanh: // DomainError: (x < -1 || x > 1) - // PoleError: (x == -1 || x == 1) - // Overall Cond: (x <= -1 || x >= 1) - case LibFunc_atanhf: // Same as atanh - case LibFunc_atanhl: // Same as atanh - { - ++NumWrappedTwoCond; - Cond = createOrCond(CI, CmpInst::FCMP_OLE, -1.0f, CmpInst::FCMP_OGE, 1.0f); - break; - } - case LibFunc_log: // DomainError: (x < 0) - // PoleError: (x == 0) - // Overall Cond: (x <= 0) - case LibFunc_logf: // Same as log - case LibFunc_logl: // Same as log - case LibFunc_log10: // Same as log - case LibFunc_log10f: // Same as log - case LibFunc_log10l: // Same as log - case LibFunc_log2: // Same as log - case LibFunc_log2f: // Same as log - case LibFunc_log2l: // Same as log - case LibFunc_logb: // Same as log - case LibFunc_logbf: // Same as log - case LibFunc_logbl: // Same as log - { - ++NumWrappedOneCond; - Cond = createCond(CI, CmpInst::FCMP_OLE, 0.0f); - break; - } - case LibFunc_log1p: // DomainError: (x < -1) - // PoleError: (x == -1) - // Overall Cond: (x <= -1) - case LibFunc_log1pf: // Same as log1p - case LibFunc_log1pl: // Same as log1p - { - ++NumWrappedOneCond; - Cond = createCond(CI, CmpInst::FCMP_OLE, -1.0f); - break; - } - case LibFunc_pow: // DomainError: x < 0 and y is noninteger - // PoleError: x == 0 and y < 0 - // RangeError: overflow or underflow - case LibFunc_powf: - case LibFunc_powl: { - Cond = generateCondForPow(CI, Func); - if (Cond == nullptr) - return false; - break; - } - default: - return false; - } - assert(Cond && "performCallErrors should not see an empty condition"); - shrinkWrapCI(CI, Cond); - return true; -} - -// Checks if CI is a candidate for shrinkwrapping and put it into work list if -// true. -void LibCallsShrinkWrap::checkCandidate(CallInst &CI) { - if (CI.isNoBuiltin()) - return; - // A possible improvement is to handle the calls with the return value being - // used. If there is API for fast libcall implementation without setting - // errno, we can use the same framework to direct/wrap the call to the fast - // API in the error free path, and leave the original call in the slow path. - if (!CI.use_empty()) - return; - - LibFunc Func; - Function *Callee = CI.getCalledFunction(); - if (!Callee) - return; - if (!TLI.getLibFunc(*Callee, Func) || !TLI.has(Func)) - return; - - if (CI.getNumArgOperands() == 0) - return; - // TODO: Handle long double in other formats. - Type *ArgType = CI.getArgOperand(0)->getType(); - if (!(ArgType->isFloatTy() || ArgType->isDoubleTy() || - ArgType->isX86_FP80Ty())) - return; - - WorkList.push_back(&CI); -} - -// Generate the upper bound condition for RangeError. -Value *LibCallsShrinkWrap::generateOneRangeCond(CallInst *CI, - const LibFunc &Func) { - float UpperBound; - switch (Func) { - case LibFunc_expm1: // RangeError: (709, inf) - UpperBound = 709.0f; - break; - case LibFunc_expm1f: // RangeError: (88, inf) - UpperBound = 88.0f; - break; - case LibFunc_expm1l: // RangeError: (11356, inf) - UpperBound = 11356.0f; - break; - default: - llvm_unreachable("Unhandled library call!"); - } - - ++NumWrappedOneCond; - return createCond(CI, CmpInst::FCMP_OGT, UpperBound); -} - -// Generate the lower and upper bound condition for RangeError. -Value *LibCallsShrinkWrap::generateTwoRangeCond(CallInst *CI, - const LibFunc &Func) { - float UpperBound, LowerBound; - switch (Func) { - case LibFunc_cosh: // RangeError: (x < -710 || x > 710) - case LibFunc_sinh: // Same as cosh - LowerBound = -710.0f; - UpperBound = 710.0f; - break; - case LibFunc_coshf: // RangeError: (x < -89 || x > 89) - case LibFunc_sinhf: // Same as coshf - LowerBound = -89.0f; - UpperBound = 89.0f; - break; - case LibFunc_coshl: // RangeError: (x < -11357 || x > 11357) - case LibFunc_sinhl: // Same as coshl - LowerBound = -11357.0f; - UpperBound = 11357.0f; - break; - case LibFunc_exp: // RangeError: (x < -745 || x > 709) - LowerBound = -745.0f; - UpperBound = 709.0f; - break; - case LibFunc_expf: // RangeError: (x < -103 || x > 88) - LowerBound = -103.0f; - UpperBound = 88.0f; - break; - case LibFunc_expl: // RangeError: (x < -11399 || x > 11356) - LowerBound = -11399.0f; - UpperBound = 11356.0f; - break; - case LibFunc_exp10: // RangeError: (x < -323 || x > 308) - LowerBound = -323.0f; - UpperBound = 308.0f; - break; - case LibFunc_exp10f: // RangeError: (x < -45 || x > 38) - LowerBound = -45.0f; - UpperBound = 38.0f; - break; - case LibFunc_exp10l: // RangeError: (x < -4950 || x > 4932) - LowerBound = -4950.0f; - UpperBound = 4932.0f; - break; - case LibFunc_exp2: // RangeError: (x < -1074 || x > 1023) - LowerBound = -1074.0f; - UpperBound = 1023.0f; - break; - case LibFunc_exp2f: // RangeError: (x < -149 || x > 127) - LowerBound = -149.0f; - UpperBound = 127.0f; - break; - case LibFunc_exp2l: // RangeError: (x < -16445 || x > 11383) - LowerBound = -16445.0f; - UpperBound = 11383.0f; - break; - default: - llvm_unreachable("Unhandled library call!"); - } - - ++NumWrappedTwoCond; - return createOrCond(CI, CmpInst::FCMP_OGT, UpperBound, CmpInst::FCMP_OLT, - LowerBound); -} - -// For pow(x,y), We only handle the following cases: -// (1) x is a constant && (x >= 1) && (x < MaxUInt8) -// Cond is: (y > 127) -// (2) x is a value coming from an integer type. -// (2.1) if x's bit_size == 8 -// Cond: (x <= 0 || y > 128) -// (2.2) if x's bit_size is 16 -// Cond: (x <= 0 || y > 64) -// (2.3) if x's bit_size is 32 -// Cond: (x <= 0 || y > 32) -// Support for powl(x,y) and powf(x,y) are TBD. -// -// Note that condition can be more conservative than the actual condition -// (i.e. we might invoke the calls that will not set the errno.). -// -Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI, - const LibFunc &Func) { - // FIXME: LibFunc_powf and powl TBD. - if (Func != LibFunc_pow) { - LLVM_DEBUG(dbgs() << "Not handled powf() and powl()\n"); - return nullptr; - } - - Value *Base = CI->getArgOperand(0); - Value *Exp = CI->getArgOperand(1); - IRBuilder<> BBBuilder(CI); - - // Constant Base case. - if (ConstantFP *CF = dyn_cast<ConstantFP>(Base)) { - double D = CF->getValueAPF().convertToDouble(); - if (D < 1.0f || D > APInt::getMaxValue(8).getZExtValue()) { - LLVM_DEBUG(dbgs() << "Not handled pow(): constant base out of range\n"); - return nullptr; - } - - ++NumWrappedOneCond; - Constant *V = ConstantFP::get(CI->getContext(), APFloat(127.0f)); - if (!Exp->getType()->isFloatTy()) - V = ConstantExpr::getFPExtend(V, Exp->getType()); - return BBBuilder.CreateFCmp(CmpInst::FCMP_OGT, Exp, V); - } - - // If the Base value coming from an integer type. - Instruction *I = dyn_cast<Instruction>(Base); - if (!I) { - LLVM_DEBUG(dbgs() << "Not handled pow(): FP type base\n"); - return nullptr; - } - unsigned Opcode = I->getOpcode(); - if (Opcode == Instruction::UIToFP || Opcode == Instruction::SIToFP) { - unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits(); - float UpperV = 0.0f; - if (BW == 8) - UpperV = 128.0f; - else if (BW == 16) - UpperV = 64.0f; - else if (BW == 32) - UpperV = 32.0f; - else { - LLVM_DEBUG(dbgs() << "Not handled pow(): type too wide\n"); - return nullptr; - } - - ++NumWrappedTwoCond; - Constant *V = ConstantFP::get(CI->getContext(), APFloat(UpperV)); - Constant *V0 = ConstantFP::get(CI->getContext(), APFloat(0.0f)); - if (!Exp->getType()->isFloatTy()) - V = ConstantExpr::getFPExtend(V, Exp->getType()); - if (!Base->getType()->isFloatTy()) - V0 = ConstantExpr::getFPExtend(V0, Exp->getType()); - - Value *Cond = BBBuilder.CreateFCmp(CmpInst::FCMP_OGT, Exp, V); - Value *Cond0 = BBBuilder.CreateFCmp(CmpInst::FCMP_OLE, Base, V0); - return BBBuilder.CreateOr(Cond0, Cond); - } - LLVM_DEBUG(dbgs() << "Not handled pow(): base not from integer convert\n"); - return nullptr; -} - -// Wrap conditions that can potentially generate errno to the library call. -void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) { - assert(Cond != nullptr && "ShrinkWrapCI is not expecting an empty call inst"); - MDNode *BranchWeights = - MDBuilder(CI->getContext()).createBranchWeights(1, 2000); - - Instruction *NewInst = - SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights, DT); - BasicBlock *CallBB = NewInst->getParent(); - CallBB->setName("cdce.call"); - BasicBlock *SuccBB = CallBB->getSingleSuccessor(); - assert(SuccBB && "The split block should have a single successor"); - SuccBB->setName("cdce.end"); - CI->removeFromParent(); - CallBB->getInstList().insert(CallBB->getFirstInsertionPt(), CI); - LLVM_DEBUG(dbgs() << "== Basic Block After =="); - LLVM_DEBUG(dbgs() << *CallBB->getSinglePredecessor() << *CallBB - << *CallBB->getSingleSuccessor() << "\n"); -} - -// Perform the transformation to a single candidate. -bool LibCallsShrinkWrap::perform(CallInst *CI) { - LibFunc Func; - Function *Callee = CI->getCalledFunction(); - assert(Callee && "perform() should apply to a non-empty callee"); - TLI.getLibFunc(*Callee, Func); - assert(Func && "perform() is not expecting an empty function"); - - if (performCallDomainErrorOnly(CI, Func) || performCallRangeErrorOnly(CI, Func)) - return true; - return performCallErrors(CI, Func); -} - -void LibCallsShrinkWrapLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); -} - -static bool runImpl(Function &F, const TargetLibraryInfo &TLI, - DominatorTree *DT) { - if (F.hasFnAttribute(Attribute::OptimizeForSize)) - return false; - LibCallsShrinkWrap CCDCE(TLI, DT); - CCDCE.visit(F); - bool Changed = CCDCE.perform(); - -// Verify the dominator after we've updated it locally. - assert(!DT || DT->verify(DominatorTree::VerificationLevel::Fast)); - return Changed; -} - -bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) { - auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); - auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; - return runImpl(F, TLI, DT); -} - -namespace llvm { -char &LibCallsShrinkWrapPassID = LibCallsShrinkWrapLegacyPass::ID; - -// Public interface to LibCallsShrinkWrap pass. -FunctionPass *createLibCallsShrinkWrapPass() { - return new LibCallsShrinkWrapLegacyPass(); -} - -PreservedAnalyses LibCallsShrinkWrapPass::run(Function &F, - FunctionAnalysisManager &FAM) { - auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); - auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); - if (!runImpl(F, TLI, DT)) - return PreservedAnalyses::all(); - auto PA = PreservedAnalyses(); - PA.preserve<GlobalsAA>(); - PA.preserve<DominatorTreeAnalysis>(); - return PA; -} -} diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp deleted file mode 100644 index 39b6b889f91c..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ /dev/null @@ -1,2956 +0,0 @@ -//===- Local.cpp - Functions to perform local transformations -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This family of functions perform various local transformations to the -// program. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Hashing.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/TinyPtrVector.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/DomTreeUpdater.h" -#include "llvm/Analysis/EHPersonalities.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LazyValueInfo.h" -#include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/MemorySSAUpdater.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/VectorUtils.h" -#include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/IR/Argument.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/ConstantRange.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DIBuilder.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/DebugLoc.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GetElementPtrTypeIterator.h" -#include "llvm/IR/GlobalObject.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Use.h" -#include "llvm/IR/User.h" -#include "llvm/IR/Value.h" -#include "llvm/IR/ValueHandle.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/KnownBits.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -#include <algorithm> -#include <cassert> -#include <climits> -#include <cstdint> -#include <iterator> -#include <map> -#include <utility> - -using namespace llvm; -using namespace llvm::PatternMatch; - -#define DEBUG_TYPE "local" - -STATISTIC(NumRemoved, "Number of unreachable basic blocks removed"); - -// Max recursion depth for collectBitParts used when detecting bswap and -// bitreverse idioms -static const unsigned BitPartRecursionMaxDepth = 64; - -//===----------------------------------------------------------------------===// -// Local constant propagation. -// - -/// ConstantFoldTerminator - If a terminator instruction is predicated on a -/// constant value, convert it into an unconditional branch to the constant -/// destination. This is a nontrivial operation because the successors of this -/// basic block must have their PHI nodes updated. -/// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch -/// conditions and indirectbr addresses this might make dead if -/// DeleteDeadConditions is true. -bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, - const TargetLibraryInfo *TLI, - DomTreeUpdater *DTU) { - Instruction *T = BB->getTerminator(); - IRBuilder<> Builder(T); - - // Branch - See if we are conditional jumping on constant - if (auto *BI = dyn_cast<BranchInst>(T)) { - if (BI->isUnconditional()) return false; // Can't optimize uncond branch - BasicBlock *Dest1 = BI->getSuccessor(0); - BasicBlock *Dest2 = BI->getSuccessor(1); - - if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) { - // Are we branching on constant? - // YES. Change to unconditional branch... - BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2; - BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1; - - // Let the basic block know that we are letting go of it. Based on this, - // it will adjust it's PHI nodes. - OldDest->removePredecessor(BB); - - // Replace the conditional branch with an unconditional one. - Builder.CreateBr(Destination); - BI->eraseFromParent(); - if (DTU) - DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, OldDest}}); - return true; - } - - if (Dest2 == Dest1) { // Conditional branch to same location? - // This branch matches something like this: - // br bool %cond, label %Dest, label %Dest - // and changes it into: br label %Dest - - // Let the basic block know that we are letting go of one copy of it. - assert(BI->getParent() && "Terminator not inserted in block!"); - Dest1->removePredecessor(BI->getParent()); - - // Replace the conditional branch with an unconditional one. - Builder.CreateBr(Dest1); - Value *Cond = BI->getCondition(); - BI->eraseFromParent(); - if (DeleteDeadConditions) - RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); - return true; - } - return false; - } - - if (auto *SI = dyn_cast<SwitchInst>(T)) { - // If we are switching on a constant, we can convert the switch to an - // unconditional branch. - auto *CI = dyn_cast<ConstantInt>(SI->getCondition()); - BasicBlock *DefaultDest = SI->getDefaultDest(); - BasicBlock *TheOnlyDest = DefaultDest; - - // If the default is unreachable, ignore it when searching for TheOnlyDest. - if (isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg()) && - SI->getNumCases() > 0) { - TheOnlyDest = SI->case_begin()->getCaseSuccessor(); - } - - // Figure out which case it goes to. - for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) { - // Found case matching a constant operand? - if (i->getCaseValue() == CI) { - TheOnlyDest = i->getCaseSuccessor(); - break; - } - - // Check to see if this branch is going to the same place as the default - // dest. If so, eliminate it as an explicit compare. - if (i->getCaseSuccessor() == DefaultDest) { - MDNode *MD = SI->getMetadata(LLVMContext::MD_prof); - unsigned NCases = SI->getNumCases(); - // Fold the case metadata into the default if there will be any branches - // left, unless the metadata doesn't match the switch. - if (NCases > 1 && MD && MD->getNumOperands() == 2 + NCases) { - // Collect branch weights into a vector. - SmallVector<uint32_t, 8> Weights; - for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e; - ++MD_i) { - auto *CI = mdconst::extract<ConstantInt>(MD->getOperand(MD_i)); - Weights.push_back(CI->getValue().getZExtValue()); - } - // Merge weight of this case to the default weight. - unsigned idx = i->getCaseIndex(); - Weights[0] += Weights[idx+1]; - // Remove weight for this case. - std::swap(Weights[idx+1], Weights.back()); - Weights.pop_back(); - SI->setMetadata(LLVMContext::MD_prof, - MDBuilder(BB->getContext()). - createBranchWeights(Weights)); - } - // Remove this entry. - BasicBlock *ParentBB = SI->getParent(); - DefaultDest->removePredecessor(ParentBB); - i = SI->removeCase(i); - e = SI->case_end(); - if (DTU) - DTU->applyUpdatesPermissive( - {{DominatorTree::Delete, ParentBB, DefaultDest}}); - continue; - } - - // Otherwise, check to see if the switch only branches to one destination. - // We do this by reseting "TheOnlyDest" to null when we find two non-equal - // destinations. - if (i->getCaseSuccessor() != TheOnlyDest) - TheOnlyDest = nullptr; - - // Increment this iterator as we haven't removed the case. - ++i; - } - - if (CI && !TheOnlyDest) { - // Branching on a constant, but not any of the cases, go to the default - // successor. - TheOnlyDest = SI->getDefaultDest(); - } - - // If we found a single destination that we can fold the switch into, do so - // now. - if (TheOnlyDest) { - // Insert the new branch. - Builder.CreateBr(TheOnlyDest); - BasicBlock *BB = SI->getParent(); - std::vector <DominatorTree::UpdateType> Updates; - if (DTU) - Updates.reserve(SI->getNumSuccessors() - 1); - - // Remove entries from PHI nodes which we no longer branch to... - for (BasicBlock *Succ : successors(SI)) { - // Found case matching a constant operand? - if (Succ == TheOnlyDest) { - TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest - } else { - Succ->removePredecessor(BB); - if (DTU) - Updates.push_back({DominatorTree::Delete, BB, Succ}); - } - } - - // Delete the old switch. - Value *Cond = SI->getCondition(); - SI->eraseFromParent(); - if (DeleteDeadConditions) - RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); - if (DTU) - DTU->applyUpdatesPermissive(Updates); - return true; - } - - if (SI->getNumCases() == 1) { - // Otherwise, we can fold this switch into a conditional branch - // instruction if it has only one non-default destination. - auto FirstCase = *SI->case_begin(); - Value *Cond = Builder.CreateICmpEQ(SI->getCondition(), - FirstCase.getCaseValue(), "cond"); - - // Insert the new branch. - BranchInst *NewBr = Builder.CreateCondBr(Cond, - FirstCase.getCaseSuccessor(), - SI->getDefaultDest()); - MDNode *MD = SI->getMetadata(LLVMContext::MD_prof); - if (MD && MD->getNumOperands() == 3) { - ConstantInt *SICase = - mdconst::dyn_extract<ConstantInt>(MD->getOperand(2)); - ConstantInt *SIDef = - mdconst::dyn_extract<ConstantInt>(MD->getOperand(1)); - assert(SICase && SIDef); - // The TrueWeight should be the weight for the single case of SI. - NewBr->setMetadata(LLVMContext::MD_prof, - MDBuilder(BB->getContext()). - createBranchWeights(SICase->getValue().getZExtValue(), - SIDef->getValue().getZExtValue())); - } - - // Update make.implicit metadata to the newly-created conditional branch. - MDNode *MakeImplicitMD = SI->getMetadata(LLVMContext::MD_make_implicit); - if (MakeImplicitMD) - NewBr->setMetadata(LLVMContext::MD_make_implicit, MakeImplicitMD); - - // Delete the old switch. - SI->eraseFromParent(); - return true; - } - return false; - } - - if (auto *IBI = dyn_cast<IndirectBrInst>(T)) { - // indirectbr blockaddress(@F, @BB) -> br label @BB - if (auto *BA = - dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) { - BasicBlock *TheOnlyDest = BA->getBasicBlock(); - std::vector <DominatorTree::UpdateType> Updates; - if (DTU) - Updates.reserve(IBI->getNumDestinations() - 1); - - // Insert the new branch. - Builder.CreateBr(TheOnlyDest); - - for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { - if (IBI->getDestination(i) == TheOnlyDest) { - TheOnlyDest = nullptr; - } else { - BasicBlock *ParentBB = IBI->getParent(); - BasicBlock *DestBB = IBI->getDestination(i); - DestBB->removePredecessor(ParentBB); - if (DTU) - Updates.push_back({DominatorTree::Delete, ParentBB, DestBB}); - } - } - Value *Address = IBI->getAddress(); - IBI->eraseFromParent(); - if (DeleteDeadConditions) - RecursivelyDeleteTriviallyDeadInstructions(Address, TLI); - - // If we didn't find our destination in the IBI successor list, then we - // have undefined behavior. Replace the unconditional branch with an - // 'unreachable' instruction. - if (TheOnlyDest) { - BB->getTerminator()->eraseFromParent(); - new UnreachableInst(BB->getContext(), BB); - } - - if (DTU) - DTU->applyUpdatesPermissive(Updates); - return true; - } - } - - return false; -} - -//===----------------------------------------------------------------------===// -// Local dead code elimination. -// - -/// isInstructionTriviallyDead - Return true if the result produced by the -/// instruction is not used, and the instruction has no side effects. -/// -bool llvm::isInstructionTriviallyDead(Instruction *I, - const TargetLibraryInfo *TLI) { - if (!I->use_empty()) - return false; - return wouldInstructionBeTriviallyDead(I, TLI); -} - -bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, - const TargetLibraryInfo *TLI) { - if (I->isTerminator()) - return false; - - // We don't want the landingpad-like instructions removed by anything this - // general. - if (I->isEHPad()) - return false; - - // We don't want debug info removed by anything this general, unless - // debug info is empty. - if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) { - if (DDI->getAddress()) - return false; - return true; - } - if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) { - if (DVI->getValue()) - return false; - return true; - } - if (DbgLabelInst *DLI = dyn_cast<DbgLabelInst>(I)) { - if (DLI->getLabel()) - return false; - return true; - } - - if (!I->mayHaveSideEffects()) - return true; - - // Special case intrinsics that "may have side effects" but can be deleted - // when dead. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - // Safe to delete llvm.stacksave and launder.invariant.group if dead. - if (II->getIntrinsicID() == Intrinsic::stacksave || - II->getIntrinsicID() == Intrinsic::launder_invariant_group) - return true; - - // Lifetime intrinsics are dead when their right-hand is undef. - if (II->isLifetimeStartOrEnd()) - return isa<UndefValue>(II->getArgOperand(1)); - - // Assumptions are dead if their condition is trivially true. Guards on - // true are operationally no-ops. In the future we can consider more - // sophisticated tradeoffs for guards considering potential for check - // widening, but for now we keep things simple. - if (II->getIntrinsicID() == Intrinsic::assume || - II->getIntrinsicID() == Intrinsic::experimental_guard) { - if (ConstantInt *Cond = dyn_cast<ConstantInt>(II->getArgOperand(0))) - return !Cond->isZero(); - - return false; - } - } - - if (isAllocLikeFn(I, TLI)) - return true; - - if (CallInst *CI = isFreeCall(I, TLI)) - if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0))) - return C->isNullValue() || isa<UndefValue>(C); - - if (auto *Call = dyn_cast<CallBase>(I)) - if (isMathLibCallNoop(Call, TLI)) - return true; - - return false; -} - -/// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a -/// trivially dead instruction, delete it. If that makes any of its operands -/// trivially dead, delete them too, recursively. Return true if any -/// instructions were deleted. -bool llvm::RecursivelyDeleteTriviallyDeadInstructions( - Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU) { - Instruction *I = dyn_cast<Instruction>(V); - if (!I || !isInstructionTriviallyDead(I, TLI)) - return false; - - SmallVector<Instruction*, 16> DeadInsts; - DeadInsts.push_back(I); - RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU); - - return true; -} - -void llvm::RecursivelyDeleteTriviallyDeadInstructions( - SmallVectorImpl<Instruction *> &DeadInsts, const TargetLibraryInfo *TLI, - MemorySSAUpdater *MSSAU) { - // Process the dead instruction list until empty. - while (!DeadInsts.empty()) { - Instruction &I = *DeadInsts.pop_back_val(); - assert(I.use_empty() && "Instructions with uses are not dead."); - assert(isInstructionTriviallyDead(&I, TLI) && - "Live instruction found in dead worklist!"); - - // Don't lose the debug info while deleting the instructions. - salvageDebugInfo(I); - - // Null out all of the instruction's operands to see if any operand becomes - // dead as we go. - for (Use &OpU : I.operands()) { - Value *OpV = OpU.get(); - OpU.set(nullptr); - - if (!OpV->use_empty()) - continue; - - // If the operand is an instruction that became dead as we nulled out the - // operand, and if it is 'trivially' dead, delete it in a future loop - // iteration. - if (Instruction *OpI = dyn_cast<Instruction>(OpV)) - if (isInstructionTriviallyDead(OpI, TLI)) - DeadInsts.push_back(OpI); - } - if (MSSAU) - MSSAU->removeMemoryAccess(&I); - - I.eraseFromParent(); - } -} - -bool llvm::replaceDbgUsesWithUndef(Instruction *I) { - SmallVector<DbgVariableIntrinsic *, 1> DbgUsers; - findDbgUsers(DbgUsers, I); - for (auto *DII : DbgUsers) { - Value *Undef = UndefValue::get(I->getType()); - DII->setOperand(0, MetadataAsValue::get(DII->getContext(), - ValueAsMetadata::get(Undef))); - } - return !DbgUsers.empty(); -} - -/// areAllUsesEqual - Check whether the uses of a value are all the same. -/// This is similar to Instruction::hasOneUse() except this will also return -/// true when there are no uses or multiple uses that all refer to the same -/// value. -static bool areAllUsesEqual(Instruction *I) { - Value::user_iterator UI = I->user_begin(); - Value::user_iterator UE = I->user_end(); - if (UI == UE) - return true; - - User *TheUse = *UI; - for (++UI; UI != UE; ++UI) { - if (*UI != TheUse) - return false; - } - return true; -} - -/// RecursivelyDeleteDeadPHINode - If the specified value is an effectively -/// dead PHI node, due to being a def-use chain of single-use nodes that -/// either forms a cycle or is terminated by a trivially dead instruction, -/// delete it. If that makes any of its operands trivially dead, delete them -/// too, recursively. Return true if a change was made. -bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, - const TargetLibraryInfo *TLI) { - SmallPtrSet<Instruction*, 4> Visited; - for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects(); - I = cast<Instruction>(*I->user_begin())) { - if (I->use_empty()) - return RecursivelyDeleteTriviallyDeadInstructions(I, TLI); - - // If we find an instruction more than once, we're on a cycle that - // won't prove fruitful. - if (!Visited.insert(I).second) { - // Break the cycle and delete the instruction and its operands. - I->replaceAllUsesWith(UndefValue::get(I->getType())); - (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI); - return true; - } - } - return false; -} - -static bool -simplifyAndDCEInstruction(Instruction *I, - SmallSetVector<Instruction *, 16> &WorkList, - const DataLayout &DL, - const TargetLibraryInfo *TLI) { - if (isInstructionTriviallyDead(I, TLI)) { - salvageDebugInfo(*I); - - // Null out all of the instruction's operands to see if any operand becomes - // dead as we go. - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - Value *OpV = I->getOperand(i); - I->setOperand(i, nullptr); - - if (!OpV->use_empty() || I == OpV) - continue; - - // If the operand is an instruction that became dead as we nulled out the - // operand, and if it is 'trivially' dead, delete it in a future loop - // iteration. - if (Instruction *OpI = dyn_cast<Instruction>(OpV)) - if (isInstructionTriviallyDead(OpI, TLI)) - WorkList.insert(OpI); - } - - I->eraseFromParent(); - - return true; - } - - if (Value *SimpleV = SimplifyInstruction(I, DL)) { - // Add the users to the worklist. CAREFUL: an instruction can use itself, - // in the case of a phi node. - for (User *U : I->users()) { - if (U != I) { - WorkList.insert(cast<Instruction>(U)); - } - } - - // Replace the instruction with its simplified value. - bool Changed = false; - if (!I->use_empty()) { - I->replaceAllUsesWith(SimpleV); - Changed = true; - } - if (isInstructionTriviallyDead(I, TLI)) { - I->eraseFromParent(); - Changed = true; - } - return Changed; - } - return false; -} - -/// SimplifyInstructionsInBlock - Scan the specified basic block and try to -/// simplify any instructions in it and recursively delete dead instructions. -/// -/// This returns true if it changed the code, note that it can delete -/// instructions in other blocks as well in this block. -bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, - const TargetLibraryInfo *TLI) { - bool MadeChange = false; - const DataLayout &DL = BB->getModule()->getDataLayout(); - -#ifndef NDEBUG - // In debug builds, ensure that the terminator of the block is never replaced - // or deleted by these simplifications. The idea of simplification is that it - // cannot introduce new instructions, and there is no way to replace the - // terminator of a block without introducing a new instruction. - AssertingVH<Instruction> TerminatorVH(&BB->back()); -#endif - - SmallSetVector<Instruction *, 16> WorkList; - // Iterate over the original function, only adding insts to the worklist - // if they actually need to be revisited. This avoids having to pre-init - // the worklist with the entire function's worth of instructions. - for (BasicBlock::iterator BI = BB->begin(), E = std::prev(BB->end()); - BI != E;) { - assert(!BI->isTerminator()); - Instruction *I = &*BI; - ++BI; - - // We're visiting this instruction now, so make sure it's not in the - // worklist from an earlier visit. - if (!WorkList.count(I)) - MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI); - } - - while (!WorkList.empty()) { - Instruction *I = WorkList.pop_back_val(); - MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI); - } - return MadeChange; -} - -//===----------------------------------------------------------------------===// -// Control Flow Graph Restructuring. -// - -/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this -/// method is called when we're about to delete Pred as a predecessor of BB. If -/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred. -/// -/// Unlike the removePredecessor method, this attempts to simplify uses of PHI -/// nodes that collapse into identity values. For example, if we have: -/// x = phi(1, 0, 0, 0) -/// y = and x, z -/// -/// .. and delete the predecessor corresponding to the '1', this will attempt to -/// recursively fold the and to 0. -void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, - DomTreeUpdater *DTU) { - // This only adjusts blocks with PHI nodes. - if (!isa<PHINode>(BB->begin())) - return; - - // Remove the entries for Pred from the PHI nodes in BB, but do not simplify - // them down. This will leave us with single entry phi nodes and other phis - // that can be removed. - BB->removePredecessor(Pred, true); - - WeakTrackingVH PhiIt = &BB->front(); - while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) { - PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt)); - Value *OldPhiIt = PhiIt; - - if (!recursivelySimplifyInstruction(PN)) - continue; - - // If recursive simplification ended up deleting the next PHI node we would - // iterate to, then our iterator is invalid, restart scanning from the top - // of the block. - if (PhiIt != OldPhiIt) PhiIt = &BB->front(); - } - if (DTU) - DTU->applyUpdatesPermissive({{DominatorTree::Delete, Pred, BB}}); -} - -/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its -/// predecessor is known to have one successor (DestBB!). Eliminate the edge -/// between them, moving the instructions in the predecessor into DestBB and -/// deleting the predecessor block. -void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, - DomTreeUpdater *DTU) { - - // If BB has single-entry PHI nodes, fold them. - while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { - Value *NewVal = PN->getIncomingValue(0); - // Replace self referencing PHI with undef, it must be dead. - if (NewVal == PN) NewVal = UndefValue::get(PN->getType()); - PN->replaceAllUsesWith(NewVal); - PN->eraseFromParent(); - } - - BasicBlock *PredBB = DestBB->getSinglePredecessor(); - assert(PredBB && "Block doesn't have a single predecessor!"); - - bool ReplaceEntryBB = false; - if (PredBB == &DestBB->getParent()->getEntryBlock()) - ReplaceEntryBB = true; - - // DTU updates: Collect all the edges that enter - // PredBB. These dominator edges will be redirected to DestBB. - SmallVector<DominatorTree::UpdateType, 32> Updates; - - if (DTU) { - Updates.push_back({DominatorTree::Delete, PredBB, DestBB}); - for (auto I = pred_begin(PredBB), E = pred_end(PredBB); I != E; ++I) { - Updates.push_back({DominatorTree::Delete, *I, PredBB}); - // This predecessor of PredBB may already have DestBB as a successor. - if (llvm::find(successors(*I), DestBB) == succ_end(*I)) - Updates.push_back({DominatorTree::Insert, *I, DestBB}); - } - } - - // Zap anything that took the address of DestBB. Not doing this will give the - // address an invalid value. - if (DestBB->hasAddressTaken()) { - BlockAddress *BA = BlockAddress::get(DestBB); - Constant *Replacement = - ConstantInt::get(Type::getInt32Ty(BA->getContext()), 1); - BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement, - BA->getType())); - BA->destroyConstant(); - } - - // Anything that branched to PredBB now branches to DestBB. - PredBB->replaceAllUsesWith(DestBB); - - // Splice all the instructions from PredBB to DestBB. - PredBB->getTerminator()->eraseFromParent(); - DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList()); - new UnreachableInst(PredBB->getContext(), PredBB); - - // If the PredBB is the entry block of the function, move DestBB up to - // become the entry block after we erase PredBB. - if (ReplaceEntryBB) - DestBB->moveAfter(PredBB); - - if (DTU) { - assert(PredBB->getInstList().size() == 1 && - isa<UnreachableInst>(PredBB->getTerminator()) && - "The successor list of PredBB isn't empty before " - "applying corresponding DTU updates."); - DTU->applyUpdatesPermissive(Updates); - DTU->deleteBB(PredBB); - // Recalculation of DomTree is needed when updating a forward DomTree and - // the Entry BB is replaced. - if (ReplaceEntryBB && DTU->hasDomTree()) { - // The entry block was removed and there is no external interface for - // the dominator tree to be notified of this change. In this corner-case - // we recalculate the entire tree. - DTU->recalculate(*(DestBB->getParent())); - } - } - - else { - PredBB->eraseFromParent(); // Nuke BB if DTU is nullptr. - } -} - -/// CanMergeValues - Return true if we can choose one of these values to use -/// in place of the other. Note that we will always choose the non-undef -/// value to keep. -static bool CanMergeValues(Value *First, Value *Second) { - return First == Second || isa<UndefValue>(First) || isa<UndefValue>(Second); -} - -/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an -/// almost-empty BB ending in an unconditional branch to Succ, into Succ. -/// -/// Assumption: Succ is the single successor for BB. -static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { - assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!"); - - LLVM_DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into " - << Succ->getName() << "\n"); - // Shortcut, if there is only a single predecessor it must be BB and merging - // is always safe - if (Succ->getSinglePredecessor()) return true; - - // Make a list of the predecessors of BB - SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB)); - - // Look at all the phi nodes in Succ, to see if they present a conflict when - // merging these blocks - for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { - PHINode *PN = cast<PHINode>(I); - - // If the incoming value from BB is again a PHINode in - // BB which has the same incoming value for *PI as PN does, we can - // merge the phi nodes and then the blocks can still be merged - PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB)); - if (BBPN && BBPN->getParent() == BB) { - for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) { - BasicBlock *IBB = PN->getIncomingBlock(PI); - if (BBPreds.count(IBB) && - !CanMergeValues(BBPN->getIncomingValueForBlock(IBB), - PN->getIncomingValue(PI))) { - LLVM_DEBUG(dbgs() - << "Can't fold, phi node " << PN->getName() << " in " - << Succ->getName() << " is conflicting with " - << BBPN->getName() << " with regard to common predecessor " - << IBB->getName() << "\n"); - return false; - } - } - } else { - Value* Val = PN->getIncomingValueForBlock(BB); - for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) { - // See if the incoming value for the common predecessor is equal to the - // one for BB, in which case this phi node will not prevent the merging - // of the block. - BasicBlock *IBB = PN->getIncomingBlock(PI); - if (BBPreds.count(IBB) && - !CanMergeValues(Val, PN->getIncomingValue(PI))) { - LLVM_DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() - << " in " << Succ->getName() - << " is conflicting with regard to common " - << "predecessor " << IBB->getName() << "\n"); - return false; - } - } - } - } - - return true; -} - -using PredBlockVector = SmallVector<BasicBlock *, 16>; -using IncomingValueMap = DenseMap<BasicBlock *, Value *>; - -/// Determines the value to use as the phi node input for a block. -/// -/// Select between \p OldVal any value that we know flows from \p BB -/// to a particular phi on the basis of which one (if either) is not -/// undef. Update IncomingValues based on the selected value. -/// -/// \param OldVal The value we are considering selecting. -/// \param BB The block that the value flows in from. -/// \param IncomingValues A map from block-to-value for other phi inputs -/// that we have examined. -/// -/// \returns the selected value. -static Value *selectIncomingValueForBlock(Value *OldVal, BasicBlock *BB, - IncomingValueMap &IncomingValues) { - if (!isa<UndefValue>(OldVal)) { - assert((!IncomingValues.count(BB) || - IncomingValues.find(BB)->second == OldVal) && - "Expected OldVal to match incoming value from BB!"); - - IncomingValues.insert(std::make_pair(BB, OldVal)); - return OldVal; - } - - IncomingValueMap::const_iterator It = IncomingValues.find(BB); - if (It != IncomingValues.end()) return It->second; - - return OldVal; -} - -/// Create a map from block to value for the operands of a -/// given phi. -/// -/// Create a map from block to value for each non-undef value flowing -/// into \p PN. -/// -/// \param PN The phi we are collecting the map for. -/// \param IncomingValues [out] The map from block to value for this phi. -static void gatherIncomingValuesToPhi(PHINode *PN, - IncomingValueMap &IncomingValues) { - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - BasicBlock *BB = PN->getIncomingBlock(i); - Value *V = PN->getIncomingValue(i); - - if (!isa<UndefValue>(V)) - IncomingValues.insert(std::make_pair(BB, V)); - } -} - -/// Replace the incoming undef values to a phi with the values -/// from a block-to-value map. -/// -/// \param PN The phi we are replacing the undefs in. -/// \param IncomingValues A map from block to value. -static void replaceUndefValuesInPhi(PHINode *PN, - const IncomingValueMap &IncomingValues) { - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *V = PN->getIncomingValue(i); - - if (!isa<UndefValue>(V)) continue; - - BasicBlock *BB = PN->getIncomingBlock(i); - IncomingValueMap::const_iterator It = IncomingValues.find(BB); - if (It == IncomingValues.end()) continue; - - PN->setIncomingValue(i, It->second); - } -} - -/// Replace a value flowing from a block to a phi with -/// potentially multiple instances of that value flowing from the -/// block's predecessors to the phi. -/// -/// \param BB The block with the value flowing into the phi. -/// \param BBPreds The predecessors of BB. -/// \param PN The phi that we are updating. -static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB, - const PredBlockVector &BBPreds, - PHINode *PN) { - Value *OldVal = PN->removeIncomingValue(BB, false); - assert(OldVal && "No entry in PHI for Pred BB!"); - - IncomingValueMap IncomingValues; - - // We are merging two blocks - BB, and the block containing PN - and - // as a result we need to redirect edges from the predecessors of BB - // to go to the block containing PN, and update PN - // accordingly. Since we allow merging blocks in the case where the - // predecessor and successor blocks both share some predecessors, - // and where some of those common predecessors might have undef - // values flowing into PN, we want to rewrite those values to be - // consistent with the non-undef values. - - gatherIncomingValuesToPhi(PN, IncomingValues); - - // If this incoming value is one of the PHI nodes in BB, the new entries - // in the PHI node are the entries from the old PHI. - if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) { - PHINode *OldValPN = cast<PHINode>(OldVal); - for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i) { - // Note that, since we are merging phi nodes and BB and Succ might - // have common predecessors, we could end up with a phi node with - // identical incoming branches. This will be cleaned up later (and - // will trigger asserts if we try to clean it up now, without also - // simplifying the corresponding conditional branch). - BasicBlock *PredBB = OldValPN->getIncomingBlock(i); - Value *PredVal = OldValPN->getIncomingValue(i); - Value *Selected = selectIncomingValueForBlock(PredVal, PredBB, - IncomingValues); - - // And add a new incoming value for this predecessor for the - // newly retargeted branch. - PN->addIncoming(Selected, PredBB); - } - } else { - for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) { - // Update existing incoming values in PN for this - // predecessor of BB. - BasicBlock *PredBB = BBPreds[i]; - Value *Selected = selectIncomingValueForBlock(OldVal, PredBB, - IncomingValues); - - // And add a new incoming value for this predecessor for the - // newly retargeted branch. - PN->addIncoming(Selected, PredBB); - } - } - - replaceUndefValuesInPhi(PN, IncomingValues); -} - -/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an -/// unconditional branch, and contains no instructions other than PHI nodes, -/// potential side-effect free intrinsics and the branch. If possible, -/// eliminate BB by rewriting all the predecessors to branch to the successor -/// block and return true. If we can't transform, return false. -bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, - DomTreeUpdater *DTU) { - assert(BB != &BB->getParent()->getEntryBlock() && - "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!"); - - // We can't eliminate infinite loops. - BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0); - if (BB == Succ) return false; - - // Check to see if merging these blocks would cause conflicts for any of the - // phi nodes in BB or Succ. If not, we can safely merge. - if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false; - - // Check for cases where Succ has multiple predecessors and a PHI node in BB - // has uses which will not disappear when the PHI nodes are merged. It is - // possible to handle such cases, but difficult: it requires checking whether - // BB dominates Succ, which is non-trivial to calculate in the case where - // Succ has multiple predecessors. Also, it requires checking whether - // constructing the necessary self-referential PHI node doesn't introduce any - // conflicts; this isn't too difficult, but the previous code for doing this - // was incorrect. - // - // Note that if this check finds a live use, BB dominates Succ, so BB is - // something like a loop pre-header (or rarely, a part of an irreducible CFG); - // folding the branch isn't profitable in that case anyway. - if (!Succ->getSinglePredecessor()) { - BasicBlock::iterator BBI = BB->begin(); - while (isa<PHINode>(*BBI)) { - for (Use &U : BBI->uses()) { - if (PHINode* PN = dyn_cast<PHINode>(U.getUser())) { - if (PN->getIncomingBlock(U) != BB) - return false; - } else { - return false; - } - } - ++BBI; - } - } - - // We cannot fold the block if it's a branch to an already present callbr - // successor because that creates duplicate successors. - for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { - if (auto *CBI = dyn_cast<CallBrInst>((*I)->getTerminator())) { - if (Succ == CBI->getDefaultDest()) - return false; - for (unsigned i = 0, e = CBI->getNumIndirectDests(); i != e; ++i) - if (Succ == CBI->getIndirectDest(i)) - return false; - } - } - - LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); - - SmallVector<DominatorTree::UpdateType, 32> Updates; - if (DTU) { - Updates.push_back({DominatorTree::Delete, BB, Succ}); - // All predecessors of BB will be moved to Succ. - for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { - Updates.push_back({DominatorTree::Delete, *I, BB}); - // This predecessor of BB may already have Succ as a successor. - if (llvm::find(successors(*I), Succ) == succ_end(*I)) - Updates.push_back({DominatorTree::Insert, *I, Succ}); - } - } - - if (isa<PHINode>(Succ->begin())) { - // If there is more than one pred of succ, and there are PHI nodes in - // the successor, then we need to add incoming edges for the PHI nodes - // - const PredBlockVector BBPreds(pred_begin(BB), pred_end(BB)); - - // Loop over all of the PHI nodes in the successor of BB. - for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { - PHINode *PN = cast<PHINode>(I); - - redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN); - } - } - - if (Succ->getSinglePredecessor()) { - // BB is the only predecessor of Succ, so Succ will end up with exactly - // the same predecessors BB had. - - // Copy over any phi, debug or lifetime instruction. - BB->getTerminator()->eraseFromParent(); - Succ->getInstList().splice(Succ->getFirstNonPHI()->getIterator(), - BB->getInstList()); - } else { - while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { - // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. - assert(PN->use_empty() && "There shouldn't be any uses here!"); - PN->eraseFromParent(); - } - } - - // If the unconditional branch we replaced contains llvm.loop metadata, we - // add the metadata to the branch instructions in the predecessors. - unsigned LoopMDKind = BB->getContext().getMDKindID("llvm.loop"); - Instruction *TI = BB->getTerminator(); - if (TI) - if (MDNode *LoopMD = TI->getMetadata(LoopMDKind)) - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *Pred = *PI; - Pred->getTerminator()->setMetadata(LoopMDKind, LoopMD); - } - - // Everything that jumped to BB now goes to Succ. - BB->replaceAllUsesWith(Succ); - if (!Succ->hasName()) Succ->takeName(BB); - - // Clear the successor list of BB to match updates applying to DTU later. - if (BB->getTerminator()) - BB->getInstList().pop_back(); - new UnreachableInst(BB->getContext(), BB); - assert(succ_empty(BB) && "The successor list of BB isn't empty before " - "applying corresponding DTU updates."); - - if (DTU) { - DTU->applyUpdatesPermissive(Updates); - DTU->deleteBB(BB); - } else { - BB->eraseFromParent(); // Delete the old basic block. - } - return true; -} - -/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI -/// nodes in this block. This doesn't try to be clever about PHI nodes -/// which differ only in the order of the incoming values, but instcombine -/// orders them so it usually won't matter. -bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { - // This implementation doesn't currently consider undef operands - // specially. Theoretically, two phis which are identical except for - // one having an undef where the other doesn't could be collapsed. - - struct PHIDenseMapInfo { - static PHINode *getEmptyKey() { - return DenseMapInfo<PHINode *>::getEmptyKey(); - } - - static PHINode *getTombstoneKey() { - return DenseMapInfo<PHINode *>::getTombstoneKey(); - } - - static unsigned getHashValue(PHINode *PN) { - // Compute a hash value on the operands. Instcombine will likely have - // sorted them, which helps expose duplicates, but we have to check all - // the operands to be safe in case instcombine hasn't run. - return static_cast<unsigned>(hash_combine( - hash_combine_range(PN->value_op_begin(), PN->value_op_end()), - hash_combine_range(PN->block_begin(), PN->block_end()))); - } - - static bool isEqual(PHINode *LHS, PHINode *RHS) { - if (LHS == getEmptyKey() || LHS == getTombstoneKey() || - RHS == getEmptyKey() || RHS == getTombstoneKey()) - return LHS == RHS; - return LHS->isIdenticalTo(RHS); - } - }; - - // Set of unique PHINodes. - DenseSet<PHINode *, PHIDenseMapInfo> PHISet; - - // Examine each PHI. - bool Changed = false; - for (auto I = BB->begin(); PHINode *PN = dyn_cast<PHINode>(I++);) { - auto Inserted = PHISet.insert(PN); - if (!Inserted.second) { - // A duplicate. Replace this PHI with its duplicate. - PN->replaceAllUsesWith(*Inserted.first); - PN->eraseFromParent(); - Changed = true; - - // The RAUW can change PHIs that we already visited. Start over from the - // beginning. - PHISet.clear(); - I = BB->begin(); - } - } - - return Changed; -} - -/// enforceKnownAlignment - If the specified pointer points to an object that -/// we control, modify the object's alignment to PrefAlign. This isn't -/// often possible though. If alignment is important, a more reliable approach -/// is to simply align all global variables and allocation instructions to -/// their preferred alignment from the beginning. -static unsigned enforceKnownAlignment(Value *V, unsigned Align, - unsigned PrefAlign, - const DataLayout &DL) { - assert(PrefAlign > Align); - - V = V->stripPointerCasts(); - - if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { - // TODO: ideally, computeKnownBits ought to have used - // AllocaInst::getAlignment() in its computation already, making - // the below max redundant. But, as it turns out, - // stripPointerCasts recurses through infinite layers of bitcasts, - // while computeKnownBits is not allowed to traverse more than 6 - // levels. - Align = std::max(AI->getAlignment(), Align); - if (PrefAlign <= Align) - return Align; - - // If the preferred alignment is greater than the natural stack alignment - // then don't round up. This avoids dynamic stack realignment. - if (DL.exceedsNaturalStackAlignment(PrefAlign)) - return Align; - AI->setAlignment(PrefAlign); - return PrefAlign; - } - - if (auto *GO = dyn_cast<GlobalObject>(V)) { - // TODO: as above, this shouldn't be necessary. - Align = std::max(GO->getAlignment(), Align); - if (PrefAlign <= Align) - return Align; - - // If there is a large requested alignment and we can, bump up the alignment - // of the global. If the memory we set aside for the global may not be the - // memory used by the final program then it is impossible for us to reliably - // enforce the preferred alignment. - if (!GO->canIncreaseAlignment()) - return Align; - - GO->setAlignment(PrefAlign); - return PrefAlign; - } - - return Align; -} - -unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, - const DataLayout &DL, - const Instruction *CxtI, - AssumptionCache *AC, - const DominatorTree *DT) { - assert(V->getType()->isPointerTy() && - "getOrEnforceKnownAlignment expects a pointer!"); - - KnownBits Known = computeKnownBits(V, DL, 0, AC, CxtI, DT); - unsigned TrailZ = Known.countMinTrailingZeros(); - - // Avoid trouble with ridiculously large TrailZ values, such as - // those computed from a null pointer. - TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1)); - - unsigned Align = 1u << std::min(Known.getBitWidth() - 1, TrailZ); - - // LLVM doesn't support alignments larger than this currently. - Align = std::min(Align, +Value::MaximumAlignment); - - if (PrefAlign > Align) - Align = enforceKnownAlignment(V, Align, PrefAlign, DL); - - // We don't need to make any adjustment. - return Align; -} - -///===---------------------------------------------------------------------===// -/// Dbg Intrinsic utilities -/// - -/// See if there is a dbg.value intrinsic for DIVar before I. -static bool LdStHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr, - Instruction *I) { - // Since we can't guarantee that the original dbg.declare instrinsic - // is removed by LowerDbgDeclare(), we need to make sure that we are - // not inserting the same dbg.value intrinsic over and over. - BasicBlock::InstListType::iterator PrevI(I); - if (PrevI != I->getParent()->getInstList().begin()) { - --PrevI; - if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI)) - if (DVI->getValue() == I->getOperand(0) && - DVI->getVariable() == DIVar && - DVI->getExpression() == DIExpr) - return true; - } - return false; -} - -/// See if there is a dbg.value intrinsic for DIVar for the PHI node. -static bool PhiHasDebugValue(DILocalVariable *DIVar, - DIExpression *DIExpr, - PHINode *APN) { - // Since we can't guarantee that the original dbg.declare instrinsic - // is removed by LowerDbgDeclare(), we need to make sure that we are - // not inserting the same dbg.value intrinsic over and over. - SmallVector<DbgValueInst *, 1> DbgValues; - findDbgValues(DbgValues, APN); - for (auto *DVI : DbgValues) { - assert(DVI->getValue() == APN); - if ((DVI->getVariable() == DIVar) && (DVI->getExpression() == DIExpr)) - return true; - } - return false; -} - -/// Check if the alloc size of \p ValTy is large enough to cover the variable -/// (or fragment of the variable) described by \p DII. -/// -/// This is primarily intended as a helper for the different -/// ConvertDebugDeclareToDebugValue functions. The dbg.declare/dbg.addr that is -/// converted describes an alloca'd variable, so we need to use the -/// alloc size of the value when doing the comparison. E.g. an i1 value will be -/// identified as covering an n-bit fragment, if the store size of i1 is at -/// least n bits. -static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) { - const DataLayout &DL = DII->getModule()->getDataLayout(); - uint64_t ValueSize = DL.getTypeAllocSizeInBits(ValTy); - if (auto FragmentSize = DII->getFragmentSizeInBits()) - return ValueSize >= *FragmentSize; - // We can't always calculate the size of the DI variable (e.g. if it is a - // VLA). Try to use the size of the alloca that the dbg intrinsic describes - // intead. - if (DII->isAddressOfVariable()) - if (auto *AI = dyn_cast_or_null<AllocaInst>(DII->getVariableLocation())) - if (auto FragmentSize = AI->getAllocationSizeInBits(DL)) - return ValueSize >= *FragmentSize; - // Could not determine size of variable. Conservatively return false. - return false; -} - -/// Produce a DebugLoc to use for each dbg.declare/inst pair that are promoted -/// to a dbg.value. Because no machine insts can come from debug intrinsics, -/// only the scope and inlinedAt is significant. Zero line numbers are used in -/// case this DebugLoc leaks into any adjacent instructions. -static DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII, Instruction *Src) { - // Original dbg.declare must have a location. - DebugLoc DeclareLoc = DII->getDebugLoc(); - MDNode *Scope = DeclareLoc.getScope(); - DILocation *InlinedAt = DeclareLoc.getInlinedAt(); - // Produce an unknown location with the correct scope / inlinedAt fields. - return DebugLoc::get(0, 0, Scope, InlinedAt); -} - -/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value -/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic. -void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, - StoreInst *SI, DIBuilder &Builder) { - assert(DII->isAddressOfVariable()); - auto *DIVar = DII->getVariable(); - assert(DIVar && "Missing variable"); - auto *DIExpr = DII->getExpression(); - Value *DV = SI->getValueOperand(); - - DebugLoc NewLoc = getDebugValueLoc(DII, SI); - - if (!valueCoversEntireFragment(DV->getType(), DII)) { - // FIXME: If storing to a part of the variable described by the dbg.declare, - // then we want to insert a dbg.value for the corresponding fragment. - LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: " - << *DII << '\n'); - // For now, when there is a store to parts of the variable (but we do not - // know which part) we insert an dbg.value instrinsic to indicate that we - // know nothing about the variable's content. - DV = UndefValue::get(DV->getType()); - if (!LdStHasDebugValue(DIVar, DIExpr, SI)) - Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI); - return; - } - - if (!LdStHasDebugValue(DIVar, DIExpr, SI)) - Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI); -} - -/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value -/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic. -void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, - LoadInst *LI, DIBuilder &Builder) { - auto *DIVar = DII->getVariable(); - auto *DIExpr = DII->getExpression(); - assert(DIVar && "Missing variable"); - - if (LdStHasDebugValue(DIVar, DIExpr, LI)) - return; - - if (!valueCoversEntireFragment(LI->getType(), DII)) { - // FIXME: If only referring to a part of the variable described by the - // dbg.declare, then we want to insert a dbg.value for the corresponding - // fragment. - LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: " - << *DII << '\n'); - return; - } - - DebugLoc NewLoc = getDebugValueLoc(DII, nullptr); - - // We are now tracking the loaded value instead of the address. In the - // future if multi-location support is added to the IR, it might be - // preferable to keep tracking both the loaded value and the original - // address in case the alloca can not be elided. - Instruction *DbgValue = Builder.insertDbgValueIntrinsic( - LI, DIVar, DIExpr, NewLoc, (Instruction *)nullptr); - DbgValue->insertAfter(LI); -} - -/// Inserts a llvm.dbg.value intrinsic after a phi that has an associated -/// llvm.dbg.declare or llvm.dbg.addr intrinsic. -void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, - PHINode *APN, DIBuilder &Builder) { - auto *DIVar = DII->getVariable(); - auto *DIExpr = DII->getExpression(); - assert(DIVar && "Missing variable"); - - if (PhiHasDebugValue(DIVar, DIExpr, APN)) - return; - - if (!valueCoversEntireFragment(APN->getType(), DII)) { - // FIXME: If only referring to a part of the variable described by the - // dbg.declare, then we want to insert a dbg.value for the corresponding - // fragment. - LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: " - << *DII << '\n'); - return; - } - - BasicBlock *BB = APN->getParent(); - auto InsertionPt = BB->getFirstInsertionPt(); - - DebugLoc NewLoc = getDebugValueLoc(DII, nullptr); - - // The block may be a catchswitch block, which does not have a valid - // insertion point. - // FIXME: Insert dbg.value markers in the successors when appropriate. - if (InsertionPt != BB->end()) - Builder.insertDbgValueIntrinsic(APN, DIVar, DIExpr, NewLoc, &*InsertionPt); -} - -/// Determine whether this alloca is either a VLA or an array. -static bool isArray(AllocaInst *AI) { - return AI->isArrayAllocation() || - AI->getType()->getElementType()->isArrayTy(); -} - -/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set -/// of llvm.dbg.value intrinsics. -bool llvm::LowerDbgDeclare(Function &F) { - DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false); - SmallVector<DbgDeclareInst *, 4> Dbgs; - for (auto &FI : F) - for (Instruction &BI : FI) - if (auto DDI = dyn_cast<DbgDeclareInst>(&BI)) - Dbgs.push_back(DDI); - - if (Dbgs.empty()) - return false; - - for (auto &I : Dbgs) { - DbgDeclareInst *DDI = I; - AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress()); - // If this is an alloca for a scalar variable, insert a dbg.value - // at each load and store to the alloca and erase the dbg.declare. - // The dbg.values allow tracking a variable even if it is not - // stored on the stack, while the dbg.declare can only describe - // the stack slot (and at a lexical-scope granularity). Later - // passes will attempt to elide the stack slot. - if (!AI || isArray(AI)) - continue; - - // A volatile load/store means that the alloca can't be elided anyway. - if (llvm::any_of(AI->users(), [](User *U) -> bool { - if (LoadInst *LI = dyn_cast<LoadInst>(U)) - return LI->isVolatile(); - if (StoreInst *SI = dyn_cast<StoreInst>(U)) - return SI->isVolatile(); - return false; - })) - continue; - - for (auto &AIUse : AI->uses()) { - User *U = AIUse.getUser(); - if (StoreInst *SI = dyn_cast<StoreInst>(U)) { - if (AIUse.getOperandNo() == 1) - ConvertDebugDeclareToDebugValue(DDI, SI, DIB); - } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) { - ConvertDebugDeclareToDebugValue(DDI, LI, DIB); - } else if (CallInst *CI = dyn_cast<CallInst>(U)) { - // This is a call by-value or some other instruction that takes a - // pointer to the variable. Insert a *value* intrinsic that describes - // the variable by dereferencing the alloca. - DebugLoc NewLoc = getDebugValueLoc(DDI, nullptr); - auto *DerefExpr = - DIExpression::append(DDI->getExpression(), dwarf::DW_OP_deref); - DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr, NewLoc, - CI); - } - } - DDI->eraseFromParent(); - } - return true; -} - -/// Propagate dbg.value intrinsics through the newly inserted PHIs. -void llvm::insertDebugValuesForPHIs(BasicBlock *BB, - SmallVectorImpl<PHINode *> &InsertedPHIs) { - assert(BB && "No BasicBlock to clone dbg.value(s) from."); - if (InsertedPHIs.size() == 0) - return; - - // Map existing PHI nodes to their dbg.values. - ValueToValueMapTy DbgValueMap; - for (auto &I : *BB) { - if (auto DbgII = dyn_cast<DbgVariableIntrinsic>(&I)) { - if (auto *Loc = dyn_cast_or_null<PHINode>(DbgII->getVariableLocation())) - DbgValueMap.insert({Loc, DbgII}); - } - } - if (DbgValueMap.size() == 0) - return; - - // Then iterate through the new PHIs and look to see if they use one of the - // previously mapped PHIs. If so, insert a new dbg.value intrinsic that will - // propagate the info through the new PHI. - LLVMContext &C = BB->getContext(); - for (auto PHI : InsertedPHIs) { - BasicBlock *Parent = PHI->getParent(); - // Avoid inserting an intrinsic into an EH block. - if (Parent->getFirstNonPHI()->isEHPad()) - continue; - auto PhiMAV = MetadataAsValue::get(C, ValueAsMetadata::get(PHI)); - for (auto VI : PHI->operand_values()) { - auto V = DbgValueMap.find(VI); - if (V != DbgValueMap.end()) { - auto *DbgII = cast<DbgVariableIntrinsic>(V->second); - Instruction *NewDbgII = DbgII->clone(); - NewDbgII->setOperand(0, PhiMAV); - auto InsertionPt = Parent->getFirstInsertionPt(); - assert(InsertionPt != Parent->end() && "Ill-formed basic block"); - NewDbgII->insertBefore(&*InsertionPt); - } - } - } -} - -/// Finds all intrinsics declaring local variables as living in the memory that -/// 'V' points to. This may include a mix of dbg.declare and -/// dbg.addr intrinsics. -TinyPtrVector<DbgVariableIntrinsic *> llvm::FindDbgAddrUses(Value *V) { - // This function is hot. Check whether the value has any metadata to avoid a - // DenseMap lookup. - if (!V->isUsedByMetadata()) - return {}; - auto *L = LocalAsMetadata::getIfExists(V); - if (!L) - return {}; - auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L); - if (!MDV) - return {}; - - TinyPtrVector<DbgVariableIntrinsic *> Declares; - for (User *U : MDV->users()) { - if (auto *DII = dyn_cast<DbgVariableIntrinsic>(U)) - if (DII->isAddressOfVariable()) - Declares.push_back(DII); - } - - return Declares; -} - -void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) { - // This function is hot. Check whether the value has any metadata to avoid a - // DenseMap lookup. - if (!V->isUsedByMetadata()) - return; - if (auto *L = LocalAsMetadata::getIfExists(V)) - if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L)) - for (User *U : MDV->users()) - if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U)) - DbgValues.push_back(DVI); -} - -void llvm::findDbgUsers(SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers, - Value *V) { - // This function is hot. Check whether the value has any metadata to avoid a - // DenseMap lookup. - if (!V->isUsedByMetadata()) - return; - if (auto *L = LocalAsMetadata::getIfExists(V)) - if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L)) - for (User *U : MDV->users()) - if (DbgVariableIntrinsic *DII = dyn_cast<DbgVariableIntrinsic>(U)) - DbgUsers.push_back(DII); -} - -bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, - Instruction *InsertBefore, DIBuilder &Builder, - uint8_t DIExprFlags, int Offset) { - auto DbgAddrs = FindDbgAddrUses(Address); - for (DbgVariableIntrinsic *DII : DbgAddrs) { - DebugLoc Loc = DII->getDebugLoc(); - auto *DIVar = DII->getVariable(); - auto *DIExpr = DII->getExpression(); - assert(DIVar && "Missing variable"); - DIExpr = DIExpression::prepend(DIExpr, DIExprFlags, Offset); - // Insert llvm.dbg.declare immediately before InsertBefore, and remove old - // llvm.dbg.declare. - Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore); - if (DII == InsertBefore) - InsertBefore = InsertBefore->getNextNode(); - DII->eraseFromParent(); - } - return !DbgAddrs.empty(); -} - -bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, - DIBuilder &Builder, uint8_t DIExprFlags, - int Offset) { - return replaceDbgDeclare(AI, NewAllocaAddress, AI->getNextNode(), Builder, - DIExprFlags, Offset); -} - -static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress, - DIBuilder &Builder, int Offset) { - DebugLoc Loc = DVI->getDebugLoc(); - auto *DIVar = DVI->getVariable(); - auto *DIExpr = DVI->getExpression(); - assert(DIVar && "Missing variable"); - - // This is an alloca-based llvm.dbg.value. The first thing it should do with - // the alloca pointer is dereference it. Otherwise we don't know how to handle - // it and give up. - if (!DIExpr || DIExpr->getNumElements() < 1 || - DIExpr->getElement(0) != dwarf::DW_OP_deref) - return; - - // Insert the offset immediately after the first deref. - // We could just change the offset argument of dbg.value, but it's unsigned... - if (Offset) { - SmallVector<uint64_t, 4> Ops; - Ops.push_back(dwarf::DW_OP_deref); - DIExpression::appendOffset(Ops, Offset); - Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end()); - DIExpr = Builder.createExpression(Ops); - } - - Builder.insertDbgValueIntrinsic(NewAddress, DIVar, DIExpr, Loc, DVI); - DVI->eraseFromParent(); -} - -void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress, - DIBuilder &Builder, int Offset) { - if (auto *L = LocalAsMetadata::getIfExists(AI)) - if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L)) - for (auto UI = MDV->use_begin(), UE = MDV->use_end(); UI != UE;) { - Use &U = *UI++; - if (auto *DVI = dyn_cast<DbgValueInst>(U.getUser())) - replaceOneDbgValueForAlloca(DVI, NewAllocaAddress, Builder, Offset); - } -} - -/// Wrap \p V in a ValueAsMetadata instance. -static MetadataAsValue *wrapValueInMetadata(LLVMContext &C, Value *V) { - return MetadataAsValue::get(C, ValueAsMetadata::get(V)); -} - -bool llvm::salvageDebugInfo(Instruction &I) { - SmallVector<DbgVariableIntrinsic *, 1> DbgUsers; - findDbgUsers(DbgUsers, &I); - if (DbgUsers.empty()) - return false; - - return salvageDebugInfoForDbgValues(I, DbgUsers); -} - -bool llvm::salvageDebugInfoForDbgValues( - Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers) { - auto &Ctx = I.getContext(); - auto wrapMD = [&](Value *V) { return wrapValueInMetadata(Ctx, V); }; - - for (auto *DII : DbgUsers) { - // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they - // are implicitly pointing out the value as a DWARF memory location - // description. - bool StackValue = isa<DbgValueInst>(DII); - - DIExpression *DIExpr = - salvageDebugInfoImpl(I, DII->getExpression(), StackValue); - - // salvageDebugInfoImpl should fail on examining the first element of - // DbgUsers, or none of them. - if (!DIExpr) - return false; - - DII->setOperand(0, wrapMD(I.getOperand(0))); - DII->setOperand(2, MetadataAsValue::get(Ctx, DIExpr)); - LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n'); - } - - return true; -} - -DIExpression *llvm::salvageDebugInfoImpl(Instruction &I, - DIExpression *SrcDIExpr, - bool WithStackValue) { - auto &M = *I.getModule(); - auto &DL = M.getDataLayout(); - - // Apply a vector of opcodes to the source DIExpression. - auto doSalvage = [&](SmallVectorImpl<uint64_t> &Ops) -> DIExpression * { - DIExpression *DIExpr = SrcDIExpr; - if (!Ops.empty()) { - DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue); - } - return DIExpr; - }; - - // Apply the given offset to the source DIExpression. - auto applyOffset = [&](uint64_t Offset) -> DIExpression * { - SmallVector<uint64_t, 8> Ops; - DIExpression::appendOffset(Ops, Offset); - return doSalvage(Ops); - }; - - // initializer-list helper for applying operators to the source DIExpression. - auto applyOps = - [&](std::initializer_list<uint64_t> Opcodes) -> DIExpression * { - SmallVector<uint64_t, 8> Ops(Opcodes); - return doSalvage(Ops); - }; - - if (auto *CI = dyn_cast<CastInst>(&I)) { - // No-op casts and zexts are irrelevant for debug info. - if (CI->isNoopCast(DL) || isa<ZExtInst>(&I)) - return SrcDIExpr; - return nullptr; - } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) { - unsigned BitWidth = - M.getDataLayout().getIndexSizeInBits(GEP->getPointerAddressSpace()); - // Rewrite a constant GEP into a DIExpression. - APInt Offset(BitWidth, 0); - if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) { - return applyOffset(Offset.getSExtValue()); - } else { - return nullptr; - } - } else if (auto *BI = dyn_cast<BinaryOperator>(&I)) { - // Rewrite binary operations with constant integer operands. - auto *ConstInt = dyn_cast<ConstantInt>(I.getOperand(1)); - if (!ConstInt || ConstInt->getBitWidth() > 64) - return nullptr; - - uint64_t Val = ConstInt->getSExtValue(); - switch (BI->getOpcode()) { - case Instruction::Add: - return applyOffset(Val); - case Instruction::Sub: - return applyOffset(-int64_t(Val)); - case Instruction::Mul: - return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_mul}); - case Instruction::SDiv: - return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_div}); - case Instruction::SRem: - return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_mod}); - case Instruction::Or: - return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_or}); - case Instruction::And: - return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_and}); - case Instruction::Xor: - return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_xor}); - case Instruction::Shl: - return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shl}); - case Instruction::LShr: - return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shr}); - case Instruction::AShr: - return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shra}); - default: - // TODO: Salvage constants from each kind of binop we know about. - return nullptr; - } - // *Not* to do: we should not attempt to salvage load instructions, - // because the validity and lifetime of a dbg.value containing - // DW_OP_deref becomes difficult to analyze. See PR40628 for examples. - } - return nullptr; -} - -/// A replacement for a dbg.value expression. -using DbgValReplacement = Optional<DIExpression *>; - -/// Point debug users of \p From to \p To using exprs given by \p RewriteExpr, -/// possibly moving/deleting users to prevent use-before-def. Returns true if -/// changes are made. -static bool rewriteDebugUsers( - Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT, - function_ref<DbgValReplacement(DbgVariableIntrinsic &DII)> RewriteExpr) { - // Find debug users of From. - SmallVector<DbgVariableIntrinsic *, 1> Users; - findDbgUsers(Users, &From); - if (Users.empty()) - return false; - - // Prevent use-before-def of To. - bool Changed = false; - SmallPtrSet<DbgVariableIntrinsic *, 1> DeleteOrSalvage; - if (isa<Instruction>(&To)) { - bool DomPointAfterFrom = From.getNextNonDebugInstruction() == &DomPoint; - - for (auto *DII : Users) { - // It's common to see a debug user between From and DomPoint. Move it - // after DomPoint to preserve the variable update without any reordering. - if (DomPointAfterFrom && DII->getNextNonDebugInstruction() == &DomPoint) { - LLVM_DEBUG(dbgs() << "MOVE: " << *DII << '\n'); - DII->moveAfter(&DomPoint); - Changed = true; - - // Users which otherwise aren't dominated by the replacement value must - // be salvaged or deleted. - } else if (!DT.dominates(&DomPoint, DII)) { - DeleteOrSalvage.insert(DII); - } - } - } - - // Update debug users without use-before-def risk. - for (auto *DII : Users) { - if (DeleteOrSalvage.count(DII)) - continue; - - LLVMContext &Ctx = DII->getContext(); - DbgValReplacement DVR = RewriteExpr(*DII); - if (!DVR) - continue; - - DII->setOperand(0, wrapValueInMetadata(Ctx, &To)); - DII->setOperand(2, MetadataAsValue::get(Ctx, *DVR)); - LLVM_DEBUG(dbgs() << "REWRITE: " << *DII << '\n'); - Changed = true; - } - - if (!DeleteOrSalvage.empty()) { - // Try to salvage the remaining debug users. - Changed |= salvageDebugInfo(From); - - // Delete the debug users which weren't salvaged. - for (auto *DII : DeleteOrSalvage) { - if (DII->getVariableLocation() == &From) { - LLVM_DEBUG(dbgs() << "Erased UseBeforeDef: " << *DII << '\n'); - DII->eraseFromParent(); - Changed = true; - } - } - } - - return Changed; -} - -/// Check if a bitcast between a value of type \p FromTy to type \p ToTy would -/// losslessly preserve the bits and semantics of the value. This predicate is -/// symmetric, i.e swapping \p FromTy and \p ToTy should give the same result. -/// -/// Note that Type::canLosslesslyBitCastTo is not suitable here because it -/// allows semantically unequivalent bitcasts, such as <2 x i64> -> <4 x i32>, -/// and also does not allow lossless pointer <-> integer conversions. -static bool isBitCastSemanticsPreserving(const DataLayout &DL, Type *FromTy, - Type *ToTy) { - // Trivially compatible types. - if (FromTy == ToTy) - return true; - - // Handle compatible pointer <-> integer conversions. - if (FromTy->isIntOrPtrTy() && ToTy->isIntOrPtrTy()) { - bool SameSize = DL.getTypeSizeInBits(FromTy) == DL.getTypeSizeInBits(ToTy); - bool LosslessConversion = !DL.isNonIntegralPointerType(FromTy) && - !DL.isNonIntegralPointerType(ToTy); - return SameSize && LosslessConversion; - } - - // TODO: This is not exhaustive. - return false; -} - -bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To, - Instruction &DomPoint, DominatorTree &DT) { - // Exit early if From has no debug users. - if (!From.isUsedByMetadata()) - return false; - - assert(&From != &To && "Can't replace something with itself"); - - Type *FromTy = From.getType(); - Type *ToTy = To.getType(); - - auto Identity = [&](DbgVariableIntrinsic &DII) -> DbgValReplacement { - return DII.getExpression(); - }; - - // Handle no-op conversions. - Module &M = *From.getModule(); - const DataLayout &DL = M.getDataLayout(); - if (isBitCastSemanticsPreserving(DL, FromTy, ToTy)) - return rewriteDebugUsers(From, To, DomPoint, DT, Identity); - - // Handle integer-to-integer widening and narrowing. - // FIXME: Use DW_OP_convert when it's available everywhere. - if (FromTy->isIntegerTy() && ToTy->isIntegerTy()) { - uint64_t FromBits = FromTy->getPrimitiveSizeInBits(); - uint64_t ToBits = ToTy->getPrimitiveSizeInBits(); - assert(FromBits != ToBits && "Unexpected no-op conversion"); - - // When the width of the result grows, assume that a debugger will only - // access the low `FromBits` bits when inspecting the source variable. - if (FromBits < ToBits) - return rewriteDebugUsers(From, To, DomPoint, DT, Identity); - - // The width of the result has shrunk. Use sign/zero extension to describe - // the source variable's high bits. - auto SignOrZeroExt = [&](DbgVariableIntrinsic &DII) -> DbgValReplacement { - DILocalVariable *Var = DII.getVariable(); - - // Without knowing signedness, sign/zero extension isn't possible. - auto Signedness = Var->getSignedness(); - if (!Signedness) - return None; - - bool Signed = *Signedness == DIBasicType::Signedness::Signed; - dwarf::TypeKind TK = Signed ? dwarf::DW_ATE_signed : dwarf::DW_ATE_unsigned; - SmallVector<uint64_t, 8> Ops({dwarf::DW_OP_LLVM_convert, ToBits, TK, - dwarf::DW_OP_LLVM_convert, FromBits, TK}); - return DIExpression::appendToStack(DII.getExpression(), Ops); - }; - return rewriteDebugUsers(From, To, DomPoint, DT, SignOrZeroExt); - } - - // TODO: Floating-point conversions, vectors. - return false; -} - -unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { - unsigned NumDeadInst = 0; - // Delete the instructions backwards, as it has a reduced likelihood of - // having to update as many def-use and use-def chains. - Instruction *EndInst = BB->getTerminator(); // Last not to be deleted. - while (EndInst != &BB->front()) { - // Delete the next to last instruction. - Instruction *Inst = &*--EndInst->getIterator(); - if (!Inst->use_empty() && !Inst->getType()->isTokenTy()) - Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); - if (Inst->isEHPad() || Inst->getType()->isTokenTy()) { - EndInst = Inst; - continue; - } - if (!isa<DbgInfoIntrinsic>(Inst)) - ++NumDeadInst; - Inst->eraseFromParent(); - } - return NumDeadInst; -} - -unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap, - bool PreserveLCSSA, DomTreeUpdater *DTU, - MemorySSAUpdater *MSSAU) { - BasicBlock *BB = I->getParent(); - std::vector <DominatorTree::UpdateType> Updates; - - if (MSSAU) - MSSAU->changeToUnreachable(I); - - // Loop over all of the successors, removing BB's entry from any PHI - // nodes. - if (DTU) - Updates.reserve(BB->getTerminator()->getNumSuccessors()); - for (BasicBlock *Successor : successors(BB)) { - Successor->removePredecessor(BB, PreserveLCSSA); - if (DTU) - Updates.push_back({DominatorTree::Delete, BB, Successor}); - } - // Insert a call to llvm.trap right before this. This turns the undefined - // behavior into a hard fail instead of falling through into random code. - if (UseLLVMTrap) { - Function *TrapFn = - Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap); - CallInst *CallTrap = CallInst::Create(TrapFn, "", I); - CallTrap->setDebugLoc(I->getDebugLoc()); - } - auto *UI = new UnreachableInst(I->getContext(), I); - UI->setDebugLoc(I->getDebugLoc()); - - // All instructions after this are dead. - unsigned NumInstrsRemoved = 0; - BasicBlock::iterator BBI = I->getIterator(), BBE = BB->end(); - while (BBI != BBE) { - if (!BBI->use_empty()) - BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); - BB->getInstList().erase(BBI++); - ++NumInstrsRemoved; - } - if (DTU) - DTU->applyUpdatesPermissive(Updates); - return NumInstrsRemoved; -} - -/// changeToCall - Convert the specified invoke into a normal call. -static void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr) { - SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end()); - SmallVector<OperandBundleDef, 1> OpBundles; - II->getOperandBundlesAsDefs(OpBundles); - CallInst *NewCall = CallInst::Create( - II->getFunctionType(), II->getCalledValue(), Args, OpBundles, "", II); - NewCall->takeName(II); - NewCall->setCallingConv(II->getCallingConv()); - NewCall->setAttributes(II->getAttributes()); - NewCall->setDebugLoc(II->getDebugLoc()); - NewCall->copyMetadata(*II); - II->replaceAllUsesWith(NewCall); - - // Follow the call by a branch to the normal destination. - BasicBlock *NormalDestBB = II->getNormalDest(); - BranchInst::Create(NormalDestBB, II); - - // Update PHI nodes in the unwind destination - BasicBlock *BB = II->getParent(); - BasicBlock *UnwindDestBB = II->getUnwindDest(); - UnwindDestBB->removePredecessor(BB); - II->eraseFromParent(); - if (DTU) - DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, UnwindDestBB}}); -} - -BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, - BasicBlock *UnwindEdge) { - BasicBlock *BB = CI->getParent(); - - // Convert this function call into an invoke instruction. First, split the - // basic block. - BasicBlock *Split = - BB->splitBasicBlock(CI->getIterator(), CI->getName() + ".noexc"); - - // Delete the unconditional branch inserted by splitBasicBlock - BB->getInstList().pop_back(); - - // Create the new invoke instruction. - SmallVector<Value *, 8> InvokeArgs(CI->arg_begin(), CI->arg_end()); - SmallVector<OperandBundleDef, 1> OpBundles; - - CI->getOperandBundlesAsDefs(OpBundles); - - // Note: we're round tripping operand bundles through memory here, and that - // can potentially be avoided with a cleverer API design that we do not have - // as of this time. - - InvokeInst *II = - InvokeInst::Create(CI->getFunctionType(), CI->getCalledValue(), Split, - UnwindEdge, InvokeArgs, OpBundles, CI->getName(), BB); - II->setDebugLoc(CI->getDebugLoc()); - II->setCallingConv(CI->getCallingConv()); - II->setAttributes(CI->getAttributes()); - - // Make sure that anything using the call now uses the invoke! This also - // updates the CallGraph if present, because it uses a WeakTrackingVH. - CI->replaceAllUsesWith(II); - - // Delete the original call - Split->getInstList().pop_front(); - return Split; -} - -static bool markAliveBlocks(Function &F, - SmallPtrSetImpl<BasicBlock *> &Reachable, - DomTreeUpdater *DTU = nullptr) { - SmallVector<BasicBlock*, 128> Worklist; - BasicBlock *BB = &F.front(); - Worklist.push_back(BB); - Reachable.insert(BB); - bool Changed = false; - do { - BB = Worklist.pop_back_val(); - - // Do a quick scan of the basic block, turning any obviously unreachable - // instructions into LLVM unreachable insts. The instruction combining pass - // canonicalizes unreachable insts into stores to null or undef. - for (Instruction &I : *BB) { - if (auto *CI = dyn_cast<CallInst>(&I)) { - Value *Callee = CI->getCalledValue(); - // Handle intrinsic calls. - if (Function *F = dyn_cast<Function>(Callee)) { - auto IntrinsicID = F->getIntrinsicID(); - // Assumptions that are known to be false are equivalent to - // unreachable. Also, if the condition is undefined, then we make the - // choice most beneficial to the optimizer, and choose that to also be - // unreachable. - if (IntrinsicID == Intrinsic::assume) { - if (match(CI->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) { - // Don't insert a call to llvm.trap right before the unreachable. - changeToUnreachable(CI, false, false, DTU); - Changed = true; - break; - } - } else if (IntrinsicID == Intrinsic::experimental_guard) { - // A call to the guard intrinsic bails out of the current - // compilation unit if the predicate passed to it is false. If the - // predicate is a constant false, then we know the guard will bail - // out of the current compile unconditionally, so all code following - // it is dead. - // - // Note: unlike in llvm.assume, it is not "obviously profitable" for - // guards to treat `undef` as `false` since a guard on `undef` can - // still be useful for widening. - if (match(CI->getArgOperand(0), m_Zero())) - if (!isa<UnreachableInst>(CI->getNextNode())) { - changeToUnreachable(CI->getNextNode(), /*UseLLVMTrap=*/false, - false, DTU); - Changed = true; - break; - } - } - } else if ((isa<ConstantPointerNull>(Callee) && - !NullPointerIsDefined(CI->getFunction())) || - isa<UndefValue>(Callee)) { - changeToUnreachable(CI, /*UseLLVMTrap=*/false, false, DTU); - Changed = true; - break; - } - if (CI->doesNotReturn() && !CI->isMustTailCall()) { - // If we found a call to a no-return function, insert an unreachable - // instruction after it. Make sure there isn't *already* one there - // though. - if (!isa<UnreachableInst>(CI->getNextNode())) { - // Don't insert a call to llvm.trap right before the unreachable. - changeToUnreachable(CI->getNextNode(), false, false, DTU); - Changed = true; - } - break; - } - } else if (auto *SI = dyn_cast<StoreInst>(&I)) { - // Store to undef and store to null are undefined and used to signal - // that they should be changed to unreachable by passes that can't - // modify the CFG. - - // Don't touch volatile stores. - if (SI->isVolatile()) continue; - - Value *Ptr = SI->getOperand(1); - - if (isa<UndefValue>(Ptr) || - (isa<ConstantPointerNull>(Ptr) && - !NullPointerIsDefined(SI->getFunction(), - SI->getPointerAddressSpace()))) { - changeToUnreachable(SI, true, false, DTU); - Changed = true; - break; - } - } - } - - Instruction *Terminator = BB->getTerminator(); - if (auto *II = dyn_cast<InvokeInst>(Terminator)) { - // Turn invokes that call 'nounwind' functions into ordinary calls. - Value *Callee = II->getCalledValue(); - if ((isa<ConstantPointerNull>(Callee) && - !NullPointerIsDefined(BB->getParent())) || - isa<UndefValue>(Callee)) { - changeToUnreachable(II, true, false, DTU); - Changed = true; - } else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) { - if (II->use_empty() && II->onlyReadsMemory()) { - // jump to the normal destination branch. - BasicBlock *NormalDestBB = II->getNormalDest(); - BasicBlock *UnwindDestBB = II->getUnwindDest(); - BranchInst::Create(NormalDestBB, II); - UnwindDestBB->removePredecessor(II->getParent()); - II->eraseFromParent(); - if (DTU) - DTU->applyUpdatesPermissive( - {{DominatorTree::Delete, BB, UnwindDestBB}}); - } else - changeToCall(II, DTU); - Changed = true; - } - } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Terminator)) { - // Remove catchpads which cannot be reached. - struct CatchPadDenseMapInfo { - static CatchPadInst *getEmptyKey() { - return DenseMapInfo<CatchPadInst *>::getEmptyKey(); - } - - static CatchPadInst *getTombstoneKey() { - return DenseMapInfo<CatchPadInst *>::getTombstoneKey(); - } - - static unsigned getHashValue(CatchPadInst *CatchPad) { - return static_cast<unsigned>(hash_combine_range( - CatchPad->value_op_begin(), CatchPad->value_op_end())); - } - - static bool isEqual(CatchPadInst *LHS, CatchPadInst *RHS) { - if (LHS == getEmptyKey() || LHS == getTombstoneKey() || - RHS == getEmptyKey() || RHS == getTombstoneKey()) - return LHS == RHS; - return LHS->isIdenticalTo(RHS); - } - }; - - // Set of unique CatchPads. - SmallDenseMap<CatchPadInst *, detail::DenseSetEmpty, 4, - CatchPadDenseMapInfo, detail::DenseSetPair<CatchPadInst *>> - HandlerSet; - detail::DenseSetEmpty Empty; - for (CatchSwitchInst::handler_iterator I = CatchSwitch->handler_begin(), - E = CatchSwitch->handler_end(); - I != E; ++I) { - BasicBlock *HandlerBB = *I; - auto *CatchPad = cast<CatchPadInst>(HandlerBB->getFirstNonPHI()); - if (!HandlerSet.insert({CatchPad, Empty}).second) { - CatchSwitch->removeHandler(I); - --I; - --E; - Changed = true; - } - } - } - - Changed |= ConstantFoldTerminator(BB, true, nullptr, DTU); - for (BasicBlock *Successor : successors(BB)) - if (Reachable.insert(Successor).second) - Worklist.push_back(Successor); - } while (!Worklist.empty()); - return Changed; -} - -void llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) { - Instruction *TI = BB->getTerminator(); - - if (auto *II = dyn_cast<InvokeInst>(TI)) { - changeToCall(II, DTU); - return; - } - - Instruction *NewTI; - BasicBlock *UnwindDest; - - if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) { - NewTI = CleanupReturnInst::Create(CRI->getCleanupPad(), nullptr, CRI); - UnwindDest = CRI->getUnwindDest(); - } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) { - auto *NewCatchSwitch = CatchSwitchInst::Create( - CatchSwitch->getParentPad(), nullptr, CatchSwitch->getNumHandlers(), - CatchSwitch->getName(), CatchSwitch); - for (BasicBlock *PadBB : CatchSwitch->handlers()) - NewCatchSwitch->addHandler(PadBB); - - NewTI = NewCatchSwitch; - UnwindDest = CatchSwitch->getUnwindDest(); - } else { - llvm_unreachable("Could not find unwind successor"); - } - - NewTI->takeName(TI); - NewTI->setDebugLoc(TI->getDebugLoc()); - UnwindDest->removePredecessor(BB); - TI->replaceAllUsesWith(NewTI); - TI->eraseFromParent(); - if (DTU) - DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, UnwindDest}}); -} - -/// removeUnreachableBlocks - Remove blocks that are not reachable, even -/// if they are in a dead cycle. Return true if a change was made, false -/// otherwise. If `LVI` is passed, this function preserves LazyValueInfo -/// after modifying the CFG. -bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, - DomTreeUpdater *DTU, - MemorySSAUpdater *MSSAU) { - SmallPtrSet<BasicBlock*, 16> Reachable; - bool Changed = markAliveBlocks(F, Reachable, DTU); - - // If there are unreachable blocks in the CFG... - if (Reachable.size() == F.size()) - return Changed; - - assert(Reachable.size() < F.size()); - NumRemoved += F.size()-Reachable.size(); - - SmallSetVector<BasicBlock *, 8> DeadBlockSet; - for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) { - auto *BB = &*I; - if (Reachable.count(BB)) - continue; - DeadBlockSet.insert(BB); - } - - if (MSSAU) - MSSAU->removeBlocks(DeadBlockSet); - - // Loop over all of the basic blocks that are not reachable, dropping all of - // their internal references. Update DTU and LVI if available. - std::vector<DominatorTree::UpdateType> Updates; - for (auto *BB : DeadBlockSet) { - for (BasicBlock *Successor : successors(BB)) { - if (!DeadBlockSet.count(Successor)) - Successor->removePredecessor(BB); - if (DTU) - Updates.push_back({DominatorTree::Delete, BB, Successor}); - } - if (LVI) - LVI->eraseBlock(BB); - BB->dropAllReferences(); - } - for (Function::iterator I = ++F.begin(); I != F.end();) { - auto *BB = &*I; - if (Reachable.count(BB)) { - ++I; - continue; - } - if (DTU) { - // Remove the terminator of BB to clear the successor list of BB. - if (BB->getTerminator()) - BB->getInstList().pop_back(); - new UnreachableInst(BB->getContext(), BB); - assert(succ_empty(BB) && "The successor list of BB isn't empty before " - "applying corresponding DTU updates."); - ++I; - } else { - I = F.getBasicBlockList().erase(I); - } - } - - if (DTU) { - DTU->applyUpdatesPermissive(Updates); - bool Deleted = false; - for (auto *BB : DeadBlockSet) { - if (DTU->isBBPendingDeletion(BB)) - --NumRemoved; - else - Deleted = true; - DTU->deleteBB(BB); - } - if (!Deleted) - return false; - } - return true; -} - -void llvm::combineMetadata(Instruction *K, const Instruction *J, - ArrayRef<unsigned> KnownIDs, bool DoesKMove) { - SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata; - K->dropUnknownNonDebugMetadata(KnownIDs); - K->getAllMetadataOtherThanDebugLoc(Metadata); - for (const auto &MD : Metadata) { - unsigned Kind = MD.first; - MDNode *JMD = J->getMetadata(Kind); - MDNode *KMD = MD.second; - - switch (Kind) { - default: - K->setMetadata(Kind, nullptr); // Remove unknown metadata - break; - case LLVMContext::MD_dbg: - llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg"); - case LLVMContext::MD_tbaa: - K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD)); - break; - case LLVMContext::MD_alias_scope: - K->setMetadata(Kind, MDNode::getMostGenericAliasScope(JMD, KMD)); - break; - case LLVMContext::MD_noalias: - case LLVMContext::MD_mem_parallel_loop_access: - K->setMetadata(Kind, MDNode::intersect(JMD, KMD)); - break; - case LLVMContext::MD_access_group: - K->setMetadata(LLVMContext::MD_access_group, - intersectAccessGroups(K, J)); - break; - case LLVMContext::MD_range: - - // If K does move, use most generic range. Otherwise keep the range of - // K. - if (DoesKMove) - // FIXME: If K does move, we should drop the range info and nonnull. - // Currently this function is used with DoesKMove in passes - // doing hoisting/sinking and the current behavior of using the - // most generic range is correct in those cases. - K->setMetadata(Kind, MDNode::getMostGenericRange(JMD, KMD)); - break; - case LLVMContext::MD_fpmath: - K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD)); - break; - case LLVMContext::MD_invariant_load: - // Only set the !invariant.load if it is present in both instructions. - K->setMetadata(Kind, JMD); - break; - case LLVMContext::MD_nonnull: - // If K does move, keep nonull if it is present in both instructions. - if (DoesKMove) - K->setMetadata(Kind, JMD); - break; - case LLVMContext::MD_invariant_group: - // Preserve !invariant.group in K. - break; - case LLVMContext::MD_align: - K->setMetadata(Kind, - MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); - break; - case LLVMContext::MD_dereferenceable: - case LLVMContext::MD_dereferenceable_or_null: - K->setMetadata(Kind, - MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); - break; - } - } - // Set !invariant.group from J if J has it. If both instructions have it - // then we will just pick it from J - even when they are different. - // Also make sure that K is load or store - f.e. combining bitcast with load - // could produce bitcast with invariant.group metadata, which is invalid. - // FIXME: we should try to preserve both invariant.group md if they are - // different, but right now instruction can only have one invariant.group. - if (auto *JMD = J->getMetadata(LLVMContext::MD_invariant_group)) - if (isa<LoadInst>(K) || isa<StoreInst>(K)) - K->setMetadata(LLVMContext::MD_invariant_group, JMD); -} - -void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J, - bool KDominatesJ) { - unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, LLVMContext::MD_range, - LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull, - LLVMContext::MD_invariant_group, LLVMContext::MD_align, - LLVMContext::MD_dereferenceable, - LLVMContext::MD_dereferenceable_or_null, - LLVMContext::MD_access_group}; - combineMetadata(K, J, KnownIDs, KDominatesJ); -} - -void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) { - auto *ReplInst = dyn_cast<Instruction>(Repl); - if (!ReplInst) - return; - - // Patch the replacement so that it is not more restrictive than the value - // being replaced. - // Note that if 'I' is a load being replaced by some operation, - // for example, by an arithmetic operation, then andIRFlags() - // would just erase all math flags from the original arithmetic - // operation, which is clearly not wanted and not needed. - if (!isa<LoadInst>(I)) - ReplInst->andIRFlags(I); - - // FIXME: If both the original and replacement value are part of the - // same control-flow region (meaning that the execution of one - // guarantees the execution of the other), then we can combine the - // noalias scopes here and do better than the general conservative - // answer used in combineMetadata(). - - // In general, GVN unifies expressions over different control-flow - // regions, and so we need a conservative combination of the noalias - // scopes. - static const unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, LLVMContext::MD_range, - LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, - LLVMContext::MD_invariant_group, LLVMContext::MD_nonnull, - LLVMContext::MD_access_group}; - combineMetadata(ReplInst, I, KnownIDs, false); -} - -template <typename RootType, typename DominatesFn> -static unsigned replaceDominatedUsesWith(Value *From, Value *To, - const RootType &Root, - const DominatesFn &Dominates) { - assert(From->getType() == To->getType()); - - unsigned Count = 0; - for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); - UI != UE;) { - Use &U = *UI++; - if (!Dominates(Root, U)) - continue; - U.set(To); - LLVM_DEBUG(dbgs() << "Replace dominated use of '" << From->getName() - << "' as " << *To << " in " << *U << "\n"); - ++Count; - } - return Count; -} - -unsigned llvm::replaceNonLocalUsesWith(Instruction *From, Value *To) { - assert(From->getType() == To->getType()); - auto *BB = From->getParent(); - unsigned Count = 0; - - for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); - UI != UE;) { - Use &U = *UI++; - auto *I = cast<Instruction>(U.getUser()); - if (I->getParent() == BB) - continue; - U.set(To); - ++Count; - } - return Count; -} - -unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, - DominatorTree &DT, - const BasicBlockEdge &Root) { - auto Dominates = [&DT](const BasicBlockEdge &Root, const Use &U) { - return DT.dominates(Root, U); - }; - return ::replaceDominatedUsesWith(From, To, Root, Dominates); -} - -unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, - DominatorTree &DT, - const BasicBlock *BB) { - auto ProperlyDominates = [&DT](const BasicBlock *BB, const Use &U) { - auto *I = cast<Instruction>(U.getUser())->getParent(); - return DT.properlyDominates(BB, I); - }; - return ::replaceDominatedUsesWith(From, To, BB, ProperlyDominates); -} - -bool llvm::callsGCLeafFunction(const CallBase *Call, - const TargetLibraryInfo &TLI) { - // Check if the function is specifically marked as a gc leaf function. - if (Call->hasFnAttr("gc-leaf-function")) - return true; - if (const Function *F = Call->getCalledFunction()) { - if (F->hasFnAttribute("gc-leaf-function")) - return true; - - if (auto IID = F->getIntrinsicID()) - // Most LLVM intrinsics do not take safepoints. - return IID != Intrinsic::experimental_gc_statepoint && - IID != Intrinsic::experimental_deoptimize; - } - - // Lib calls can be materialized by some passes, and won't be - // marked as 'gc-leaf-function.' All available Libcalls are - // GC-leaf. - LibFunc LF; - if (TLI.getLibFunc(ImmutableCallSite(Call), LF)) { - return TLI.has(LF); - } - - return false; -} - -void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, - LoadInst &NewLI) { - auto *NewTy = NewLI.getType(); - - // This only directly applies if the new type is also a pointer. - if (NewTy->isPointerTy()) { - NewLI.setMetadata(LLVMContext::MD_nonnull, N); - return; - } - - // The only other translation we can do is to integral loads with !range - // metadata. - if (!NewTy->isIntegerTy()) - return; - - MDBuilder MDB(NewLI.getContext()); - const Value *Ptr = OldLI.getPointerOperand(); - auto *ITy = cast<IntegerType>(NewTy); - auto *NullInt = ConstantExpr::getPtrToInt( - ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy); - auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1)); - NewLI.setMetadata(LLVMContext::MD_range, - MDB.createRange(NonNullInt, NullInt)); -} - -void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, - MDNode *N, LoadInst &NewLI) { - auto *NewTy = NewLI.getType(); - - // Give up unless it is converted to a pointer where there is a single very - // valuable mapping we can do reliably. - // FIXME: It would be nice to propagate this in more ways, but the type - // conversions make it hard. - if (!NewTy->isPointerTy()) - return; - - unsigned BitWidth = DL.getIndexTypeSizeInBits(NewTy); - if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { - MDNode *NN = MDNode::get(OldLI.getContext(), None); - NewLI.setMetadata(LLVMContext::MD_nonnull, NN); - } -} - -void llvm::dropDebugUsers(Instruction &I) { - SmallVector<DbgVariableIntrinsic *, 1> DbgUsers; - findDbgUsers(DbgUsers, &I); - for (auto *DII : DbgUsers) - DII->eraseFromParent(); -} - -void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, - BasicBlock *BB) { - // Since we are moving the instructions out of its basic block, we do not - // retain their original debug locations (DILocations) and debug intrinsic - // instructions. - // - // Doing so would degrade the debugging experience and adversely affect the - // accuracy of profiling information. - // - // Currently, when hoisting the instructions, we take the following actions: - // - Remove their debug intrinsic instructions. - // - Set their debug locations to the values from the insertion point. - // - // As per PR39141 (comment #8), the more fundamental reason why the dbg.values - // need to be deleted, is because there will not be any instructions with a - // DILocation in either branch left after performing the transformation. We - // can only insert a dbg.value after the two branches are joined again. - // - // See PR38762, PR39243 for more details. - // - // TODO: Extend llvm.dbg.value to take more than one SSA Value (PR39141) to - // encode predicated DIExpressions that yield different results on different - // code paths. - for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { - Instruction *I = &*II; - I->dropUnknownNonDebugMetadata(); - if (I->isUsedByMetadata()) - dropDebugUsers(*I); - if (isa<DbgInfoIntrinsic>(I)) { - // Remove DbgInfo Intrinsics. - II = I->eraseFromParent(); - continue; - } - I->setDebugLoc(InsertPt->getDebugLoc()); - ++II; - } - DomBlock->getInstList().splice(InsertPt->getIterator(), BB->getInstList(), - BB->begin(), - BB->getTerminator()->getIterator()); -} - -namespace { - -/// A potential constituent of a bitreverse or bswap expression. See -/// collectBitParts for a fuller explanation. -struct BitPart { - BitPart(Value *P, unsigned BW) : Provider(P) { - Provenance.resize(BW); - } - - /// The Value that this is a bitreverse/bswap of. - Value *Provider; - - /// The "provenance" of each bit. Provenance[A] = B means that bit A - /// in Provider becomes bit B in the result of this expression. - SmallVector<int8_t, 32> Provenance; // int8_t means max size is i128. - - enum { Unset = -1 }; -}; - -} // end anonymous namespace - -/// Analyze the specified subexpression and see if it is capable of providing -/// pieces of a bswap or bitreverse. The subexpression provides a potential -/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in -/// the output of the expression came from a corresponding bit in some other -/// value. This function is recursive, and the end result is a mapping of -/// bitnumber to bitnumber. It is the caller's responsibility to validate that -/// the bitnumber to bitnumber mapping is correct for a bswap or bitreverse. -/// -/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know -/// that the expression deposits the low byte of %X into the high byte of the -/// result and that all other bits are zero. This expression is accepted and a -/// BitPart is returned with Provider set to %X and Provenance[24-31] set to -/// [0-7]. -/// -/// To avoid revisiting values, the BitPart results are memoized into the -/// provided map. To avoid unnecessary copying of BitParts, BitParts are -/// constructed in-place in the \c BPS map. Because of this \c BPS needs to -/// store BitParts objects, not pointers. As we need the concept of a nullptr -/// BitParts (Value has been analyzed and the analysis failed), we an Optional -/// type instead to provide the same functionality. -/// -/// Because we pass around references into \c BPS, we must use a container that -/// does not invalidate internal references (std::map instead of DenseMap). -static const Optional<BitPart> & -collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, - std::map<Value *, Optional<BitPart>> &BPS, int Depth) { - auto I = BPS.find(V); - if (I != BPS.end()) - return I->second; - - auto &Result = BPS[V] = None; - auto BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); - - // Prevent stack overflow by limiting the recursion depth - if (Depth == BitPartRecursionMaxDepth) { - LLVM_DEBUG(dbgs() << "collectBitParts max recursion depth reached.\n"); - return Result; - } - - if (Instruction *I = dyn_cast<Instruction>(V)) { - // If this is an or instruction, it may be an inner node of the bswap. - if (I->getOpcode() == Instruction::Or) { - auto &A = collectBitParts(I->getOperand(0), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); - auto &B = collectBitParts(I->getOperand(1), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); - if (!A || !B) - return Result; - - // Try and merge the two together. - if (!A->Provider || A->Provider != B->Provider) - return Result; - - Result = BitPart(A->Provider, BitWidth); - for (unsigned i = 0; i < A->Provenance.size(); ++i) { - if (A->Provenance[i] != BitPart::Unset && - B->Provenance[i] != BitPart::Unset && - A->Provenance[i] != B->Provenance[i]) - return Result = None; - - if (A->Provenance[i] == BitPart::Unset) - Result->Provenance[i] = B->Provenance[i]; - else - Result->Provenance[i] = A->Provenance[i]; - } - - return Result; - } - - // If this is a logical shift by a constant, recurse then shift the result. - if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) { - unsigned BitShift = - cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U); - // Ensure the shift amount is defined. - if (BitShift > BitWidth) - return Result; - - auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); - if (!Res) - return Result; - Result = Res; - - // Perform the "shift" on BitProvenance. - auto &P = Result->Provenance; - if (I->getOpcode() == Instruction::Shl) { - P.erase(std::prev(P.end(), BitShift), P.end()); - P.insert(P.begin(), BitShift, BitPart::Unset); - } else { - P.erase(P.begin(), std::next(P.begin(), BitShift)); - P.insert(P.end(), BitShift, BitPart::Unset); - } - - return Result; - } - - // If this is a logical 'and' with a mask that clears bits, recurse then - // unset the appropriate bits. - if (I->getOpcode() == Instruction::And && - isa<ConstantInt>(I->getOperand(1))) { - APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1); - const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue(); - - // Check that the mask allows a multiple of 8 bits for a bswap, for an - // early exit. - unsigned NumMaskedBits = AndMask.countPopulation(); - if (!MatchBitReversals && NumMaskedBits % 8 != 0) - return Result; - - auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); - if (!Res) - return Result; - Result = Res; - - for (unsigned i = 0; i < BitWidth; ++i, Bit <<= 1) - // If the AndMask is zero for this bit, clear the bit. - if ((AndMask & Bit) == 0) - Result->Provenance[i] = BitPart::Unset; - return Result; - } - - // If this is a zext instruction zero extend the result. - if (I->getOpcode() == Instruction::ZExt) { - auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, - MatchBitReversals, BPS, Depth + 1); - if (!Res) - return Result; - - Result = BitPart(Res->Provider, BitWidth); - auto NarrowBitWidth = - cast<IntegerType>(cast<ZExtInst>(I)->getSrcTy())->getBitWidth(); - for (unsigned i = 0; i < NarrowBitWidth; ++i) - Result->Provenance[i] = Res->Provenance[i]; - for (unsigned i = NarrowBitWidth; i < BitWidth; ++i) - Result->Provenance[i] = BitPart::Unset; - return Result; - } - } - - // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be - // the input value to the bswap/bitreverse. - Result = BitPart(V, BitWidth); - for (unsigned i = 0; i < BitWidth; ++i) - Result->Provenance[i] = i; - return Result; -} - -static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To, - unsigned BitWidth) { - if (From % 8 != To % 8) - return false; - // Convert from bit indices to byte indices and check for a byte reversal. - From >>= 3; - To >>= 3; - BitWidth >>= 3; - return From == BitWidth - To - 1; -} - -static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To, - unsigned BitWidth) { - return From == BitWidth - To - 1; -} - -bool llvm::recognizeBSwapOrBitReverseIdiom( - Instruction *I, bool MatchBSwaps, bool MatchBitReversals, - SmallVectorImpl<Instruction *> &InsertedInsts) { - if (Operator::getOpcode(I) != Instruction::Or) - return false; - if (!MatchBSwaps && !MatchBitReversals) - return false; - IntegerType *ITy = dyn_cast<IntegerType>(I->getType()); - if (!ITy || ITy->getBitWidth() > 128) - return false; // Can't do vectors or integers > 128 bits. - unsigned BW = ITy->getBitWidth(); - - unsigned DemandedBW = BW; - IntegerType *DemandedTy = ITy; - if (I->hasOneUse()) { - if (TruncInst *Trunc = dyn_cast<TruncInst>(I->user_back())) { - DemandedTy = cast<IntegerType>(Trunc->getType()); - DemandedBW = DemandedTy->getBitWidth(); - } - } - - // Try to find all the pieces corresponding to the bswap. - std::map<Value *, Optional<BitPart>> BPS; - auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS, 0); - if (!Res) - return false; - auto &BitProvenance = Res->Provenance; - - // Now, is the bit permutation correct for a bswap or a bitreverse? We can - // only byteswap values with an even number of bytes. - bool OKForBSwap = DemandedBW % 16 == 0, OKForBitReverse = true; - for (unsigned i = 0; i < DemandedBW; ++i) { - OKForBSwap &= - bitTransformIsCorrectForBSwap(BitProvenance[i], i, DemandedBW); - OKForBitReverse &= - bitTransformIsCorrectForBitReverse(BitProvenance[i], i, DemandedBW); - } - - Intrinsic::ID Intrin; - if (OKForBSwap && MatchBSwaps) - Intrin = Intrinsic::bswap; - else if (OKForBitReverse && MatchBitReversals) - Intrin = Intrinsic::bitreverse; - else - return false; - - if (ITy != DemandedTy) { - Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, DemandedTy); - Value *Provider = Res->Provider; - IntegerType *ProviderTy = cast<IntegerType>(Provider->getType()); - // We may need to truncate the provider. - if (DemandedTy != ProviderTy) { - auto *Trunc = CastInst::Create(Instruction::Trunc, Provider, DemandedTy, - "trunc", I); - InsertedInsts.push_back(Trunc); - Provider = Trunc; - } - auto *CI = CallInst::Create(F, Provider, "rev", I); - InsertedInsts.push_back(CI); - auto *ExtInst = CastInst::Create(Instruction::ZExt, CI, ITy, "zext", I); - InsertedInsts.push_back(ExtInst); - return true; - } - - Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, ITy); - InsertedInsts.push_back(CallInst::Create(F, Res->Provider, "rev", I)); - return true; -} - -// CodeGen has special handling for some string functions that may replace -// them with target-specific intrinsics. Since that'd skip our interceptors -// in ASan/MSan/TSan/DFSan, and thus make us miss some memory accesses, -// we mark affected calls as NoBuiltin, which will disable optimization -// in CodeGen. -void llvm::maybeMarkSanitizerLibraryCallNoBuiltin( - CallInst *CI, const TargetLibraryInfo *TLI) { - Function *F = CI->getCalledFunction(); - LibFunc Func; - if (F && !F->hasLocalLinkage() && F->hasName() && - TLI->getLibFunc(F->getName(), Func) && TLI->hasOptimizedCodeGen(Func) && - !F->doesNotAccessMemory()) - CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoBuiltin); -} - -bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) { - // We can't have a PHI with a metadata type. - if (I->getOperand(OpIdx)->getType()->isMetadataTy()) - return false; - - // Early exit. - if (!isa<Constant>(I->getOperand(OpIdx))) - return true; - - switch (I->getOpcode()) { - default: - return true; - case Instruction::Call: - case Instruction::Invoke: - // Can't handle inline asm. Skip it. - if (isa<InlineAsm>(ImmutableCallSite(I).getCalledValue())) - return false; - // Many arithmetic intrinsics have no issue taking a - // variable, however it's hard to distingish these from - // specials such as @llvm.frameaddress that require a constant. - if (isa<IntrinsicInst>(I)) - return false; - - // Constant bundle operands may need to retain their constant-ness for - // correctness. - if (ImmutableCallSite(I).isBundleOperand(OpIdx)) - return false; - return true; - case Instruction::ShuffleVector: - // Shufflevector masks are constant. - return OpIdx != 2; - case Instruction::Switch: - case Instruction::ExtractValue: - // All operands apart from the first are constant. - return OpIdx == 0; - case Instruction::InsertValue: - // All operands apart from the first and the second are constant. - return OpIdx < 2; - case Instruction::Alloca: - // Static allocas (constant size in the entry block) are handled by - // prologue/epilogue insertion so they're free anyway. We definitely don't - // want to make them non-constant. - return !cast<AllocaInst>(I)->isStaticAlloca(); - case Instruction::GetElementPtr: - if (OpIdx == 0) - return true; - gep_type_iterator It = gep_type_begin(I); - for (auto E = std::next(It, OpIdx); It != E; ++It) - if (It.isStruct()) - return false; - return true; - } -} - -using AllocaForValueMapTy = DenseMap<Value *, AllocaInst *>; -AllocaInst *llvm::findAllocaForValue(Value *V, - AllocaForValueMapTy &AllocaForValue) { - if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) - return AI; - // See if we've already calculated (or started to calculate) alloca for a - // given value. - AllocaForValueMapTy::iterator I = AllocaForValue.find(V); - if (I != AllocaForValue.end()) - return I->second; - // Store 0 while we're calculating alloca for value V to avoid - // infinite recursion if the value references itself. - AllocaForValue[V] = nullptr; - AllocaInst *Res = nullptr; - if (CastInst *CI = dyn_cast<CastInst>(V)) - Res = findAllocaForValue(CI->getOperand(0), AllocaForValue); - else if (PHINode *PN = dyn_cast<PHINode>(V)) { - for (Value *IncValue : PN->incoming_values()) { - // Allow self-referencing phi-nodes. - if (IncValue == PN) - continue; - AllocaInst *IncValueAI = findAllocaForValue(IncValue, AllocaForValue); - // AI for incoming values should exist and should all be equal. - if (IncValueAI == nullptr || (Res != nullptr && IncValueAI != Res)) - return nullptr; - Res = IncValueAI; - } - } else if (GetElementPtrInst *EP = dyn_cast<GetElementPtrInst>(V)) { - Res = findAllocaForValue(EP->getPointerOperand(), AllocaForValue); - } else { - LLVM_DEBUG(dbgs() << "Alloca search cancelled on unknown instruction: " - << *V << "\n"); - } - if (Res) - AllocaForValue[V] = Res; - return Res; -} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp deleted file mode 100644 index 37389a695b45..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ /dev/null @@ -1,690 +0,0 @@ -//===----------------- LoopRotationUtils.cpp -----------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file provides utilities to convert a loop into a loop with bottom test. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/LoopRotationUtils.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Analysis/DomTreeUpdater.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/MemorySSA.h" -#include "llvm/Analysis/MemorySSAUpdater.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/LoopUtils.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -using namespace llvm; - -#define DEBUG_TYPE "loop-rotate" - -STATISTIC(NumRotated, "Number of loops rotated"); - -namespace { -/// A simple loop rotation transformation. -class LoopRotate { - const unsigned MaxHeaderSize; - LoopInfo *LI; - const TargetTransformInfo *TTI; - AssumptionCache *AC; - DominatorTree *DT; - ScalarEvolution *SE; - MemorySSAUpdater *MSSAU; - const SimplifyQuery &SQ; - bool RotationOnly; - bool IsUtilMode; - -public: - LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI, - const TargetTransformInfo *TTI, AssumptionCache *AC, - DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, - const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode) - : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE), - MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly), - IsUtilMode(IsUtilMode) {} - bool processLoop(Loop *L); - -private: - bool rotateLoop(Loop *L, bool SimplifiedLatch); - bool simplifyLoopLatch(Loop *L); -}; -} // end anonymous namespace - -/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the -/// old header into the preheader. If there were uses of the values produced by -/// these instruction that were outside of the loop, we have to insert PHI nodes -/// to merge the two values. Do this now. -static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, - BasicBlock *OrigPreheader, - ValueToValueMapTy &ValueMap, - SmallVectorImpl<PHINode*> *InsertedPHIs) { - // Remove PHI node entries that are no longer live. - BasicBlock::iterator I, E = OrigHeader->end(); - for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) - PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader)); - - // Now fix up users of the instructions in OrigHeader, inserting PHI nodes - // as necessary. - SSAUpdater SSA(InsertedPHIs); - for (I = OrigHeader->begin(); I != E; ++I) { - Value *OrigHeaderVal = &*I; - - // If there are no uses of the value (e.g. because it returns void), there - // is nothing to rewrite. - if (OrigHeaderVal->use_empty()) - continue; - - Value *OrigPreHeaderVal = ValueMap.lookup(OrigHeaderVal); - - // The value now exits in two versions: the initial value in the preheader - // and the loop "next" value in the original header. - SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName()); - SSA.AddAvailableValue(OrigHeader, OrigHeaderVal); - SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal); - - // Visit each use of the OrigHeader instruction. - for (Value::use_iterator UI = OrigHeaderVal->use_begin(), - UE = OrigHeaderVal->use_end(); - UI != UE;) { - // Grab the use before incrementing the iterator. - Use &U = *UI; - - // Increment the iterator before removing the use from the list. - ++UI; - - // SSAUpdater can't handle a non-PHI use in the same block as an - // earlier def. We can easily handle those cases manually. - Instruction *UserInst = cast<Instruction>(U.getUser()); - if (!isa<PHINode>(UserInst)) { - BasicBlock *UserBB = UserInst->getParent(); - - // The original users in the OrigHeader are already using the - // original definitions. - if (UserBB == OrigHeader) - continue; - - // Users in the OrigPreHeader need to use the value to which the - // original definitions are mapped. - if (UserBB == OrigPreheader) { - U = OrigPreHeaderVal; - continue; - } - } - - // Anything else can be handled by SSAUpdater. - SSA.RewriteUse(U); - } - - // Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug - // intrinsics. - SmallVector<DbgValueInst *, 1> DbgValues; - llvm::findDbgValues(DbgValues, OrigHeaderVal); - for (auto &DbgValue : DbgValues) { - // The original users in the OrigHeader are already using the original - // definitions. - BasicBlock *UserBB = DbgValue->getParent(); - if (UserBB == OrigHeader) - continue; - - // Users in the OrigPreHeader need to use the value to which the - // original definitions are mapped and anything else can be handled by - // the SSAUpdater. To avoid adding PHINodes, check if the value is - // available in UserBB, if not substitute undef. - Value *NewVal; - if (UserBB == OrigPreheader) - NewVal = OrigPreHeaderVal; - else if (SSA.HasValueForBlock(UserBB)) - NewVal = SSA.GetValueInMiddleOfBlock(UserBB); - else - NewVal = UndefValue::get(OrigHeaderVal->getType()); - DbgValue->setOperand(0, - MetadataAsValue::get(OrigHeaderVal->getContext(), - ValueAsMetadata::get(NewVal))); - } - } -} - -// Look for a phi which is only used outside the loop (via a LCSSA phi) -// in the exit from the header. This means that rotating the loop can -// remove the phi. -static bool shouldRotateLoopExitingLatch(Loop *L) { - BasicBlock *Header = L->getHeader(); - BasicBlock *HeaderExit = Header->getTerminator()->getSuccessor(0); - if (L->contains(HeaderExit)) - HeaderExit = Header->getTerminator()->getSuccessor(1); - - for (auto &Phi : Header->phis()) { - // Look for uses of this phi in the loop/via exits other than the header. - if (llvm::any_of(Phi.users(), [HeaderExit](const User *U) { - return cast<Instruction>(U)->getParent() != HeaderExit; - })) - continue; - return true; - } - - return false; -} - -/// Rotate loop LP. Return true if the loop is rotated. -/// -/// \param SimplifiedLatch is true if the latch was just folded into the final -/// loop exit. In this case we may want to rotate even though the new latch is -/// now an exiting branch. This rotation would have happened had the latch not -/// been simplified. However, if SimplifiedLatch is false, then we avoid -/// rotating loops in which the latch exits to avoid excessive or endless -/// rotation. LoopRotate should be repeatable and converge to a canonical -/// form. This property is satisfied because simplifying the loop latch can only -/// happen once across multiple invocations of the LoopRotate pass. -bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { - // If the loop has only one block then there is not much to rotate. - if (L->getBlocks().size() == 1) - return false; - - BasicBlock *OrigHeader = L->getHeader(); - BasicBlock *OrigLatch = L->getLoopLatch(); - - BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); - if (!BI || BI->isUnconditional()) - return false; - - // If the loop header is not one of the loop exiting blocks then - // either this loop is already rotated or it is not - // suitable for loop rotation transformations. - if (!L->isLoopExiting(OrigHeader)) - return false; - - // If the loop latch already contains a branch that leaves the loop then the - // loop is already rotated. - if (!OrigLatch) - return false; - - // Rotate if either the loop latch does *not* exit the loop, or if the loop - // latch was just simplified. Or if we think it will be profitable. - if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false && - !shouldRotateLoopExitingLatch(L)) - return false; - - // Check size of original header and reject loop if it is very big or we can't - // duplicate blocks inside it. - { - SmallPtrSet<const Value *, 32> EphValues; - CodeMetrics::collectEphemeralValues(L, AC, EphValues); - - CodeMetrics Metrics; - Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues); - if (Metrics.notDuplicatable) { - LLVM_DEBUG( - dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable" - << " instructions: "; - L->dump()); - return false; - } - if (Metrics.convergent) { - LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent " - "instructions: "; - L->dump()); - return false; - } - if (Metrics.NumInsts > MaxHeaderSize) - return false; - } - - // Now, this loop is suitable for rotation. - BasicBlock *OrigPreheader = L->getLoopPreheader(); - - // If the loop could not be converted to canonical form, it must have an - // indirectbr in it, just give up. - if (!OrigPreheader || !L->hasDedicatedExits()) - return false; - - // Anything ScalarEvolution may know about this loop or the PHI nodes - // in its header will soon be invalidated. We should also invalidate - // all outer loops because insertion and deletion of blocks that happens - // during the rotation may violate invariants related to backedge taken - // infos in them. - if (SE) - SE->forgetTopmostLoop(L); - - LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump()); - if (MSSAU && VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); - - // Find new Loop header. NewHeader is a Header's one and only successor - // that is inside loop. Header's other successor is outside the - // loop. Otherwise loop is not suitable for rotation. - BasicBlock *Exit = BI->getSuccessor(0); - BasicBlock *NewHeader = BI->getSuccessor(1); - if (L->contains(Exit)) - std::swap(Exit, NewHeader); - assert(NewHeader && "Unable to determine new loop header"); - assert(L->contains(NewHeader) && !L->contains(Exit) && - "Unable to determine loop header and exit blocks"); - - // This code assumes that the new header has exactly one predecessor. - // Remove any single-entry PHI nodes in it. - assert(NewHeader->getSinglePredecessor() && - "New header doesn't have one pred!"); - FoldSingleEntryPHINodes(NewHeader); - - // Begin by walking OrigHeader and populating ValueMap with an entry for - // each Instruction. - BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end(); - ValueToValueMapTy ValueMap, ValueMapMSSA; - - // For PHI nodes, the value available in OldPreHeader is just the - // incoming value from OldPreHeader. - for (; PHINode *PN = dyn_cast<PHINode>(I); ++I) - ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader); - - // For the rest of the instructions, either hoist to the OrigPreheader if - // possible or create a clone in the OldPreHeader if not. - Instruction *LoopEntryBranch = OrigPreheader->getTerminator(); - - // Record all debug intrinsics preceding LoopEntryBranch to avoid duplication. - using DbgIntrinsicHash = - std::pair<std::pair<Value *, DILocalVariable *>, DIExpression *>; - auto makeHash = [](DbgVariableIntrinsic *D) -> DbgIntrinsicHash { - return {{D->getVariableLocation(), D->getVariable()}, D->getExpression()}; - }; - SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics; - for (auto I = std::next(OrigPreheader->rbegin()), E = OrigPreheader->rend(); - I != E; ++I) { - if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&*I)) - DbgIntrinsics.insert(makeHash(DII)); - else - break; - } - - while (I != E) { - Instruction *Inst = &*I++; - - // If the instruction's operands are invariant and it doesn't read or write - // memory, then it is safe to hoist. Doing this doesn't change the order of - // execution in the preheader, but does prevent the instruction from - // executing in each iteration of the loop. This means it is safe to hoist - // something that might trap, but isn't safe to hoist something that reads - // memory (without proving that the loop doesn't write). - if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() && - !Inst->mayWriteToMemory() && !Inst->isTerminator() && - !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) { - Inst->moveBefore(LoopEntryBranch); - continue; - } - - // Otherwise, create a duplicate of the instruction. - Instruction *C = Inst->clone(); - - // Eagerly remap the operands of the instruction. - RemapInstruction(C, ValueMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - - // Avoid inserting the same intrinsic twice. - if (auto *DII = dyn_cast<DbgVariableIntrinsic>(C)) - if (DbgIntrinsics.count(makeHash(DII))) { - C->deleteValue(); - continue; - } - - // With the operands remapped, see if the instruction constant folds or is - // otherwise simplifyable. This commonly occurs because the entry from PHI - // nodes allows icmps and other instructions to fold. - Value *V = SimplifyInstruction(C, SQ); - if (V && LI->replacementPreservesLCSSAForm(C, V)) { - // If so, then delete the temporary instruction and stick the folded value - // in the map. - ValueMap[Inst] = V; - if (!C->mayHaveSideEffects()) { - C->deleteValue(); - C = nullptr; - } - } else { - ValueMap[Inst] = C; - } - if (C) { - // Otherwise, stick the new instruction into the new block! - C->setName(Inst->getName()); - C->insertBefore(LoopEntryBranch); - - if (auto *II = dyn_cast<IntrinsicInst>(C)) - if (II->getIntrinsicID() == Intrinsic::assume) - AC->registerAssumption(II); - // MemorySSA cares whether the cloned instruction was inserted or not, and - // not whether it can be remapped to a simplified value. - ValueMapMSSA[Inst] = C; - } - } - - // Along with all the other instructions, we just cloned OrigHeader's - // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's - // successors by duplicating their incoming values for OrigHeader. - for (BasicBlock *SuccBB : successors(OrigHeader)) - for (BasicBlock::iterator BI = SuccBB->begin(); - PHINode *PN = dyn_cast<PHINode>(BI); ++BI) - PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader); - - // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove - // OrigPreHeader's old terminator (the original branch into the loop), and - // remove the corresponding incoming values from the PHI nodes in OrigHeader. - LoopEntryBranch->eraseFromParent(); - - // Update MemorySSA before the rewrite call below changes the 1:1 - // instruction:cloned_instruction_or_value mapping. - if (MSSAU) { - ValueMapMSSA[OrigHeader] = OrigPreheader; - MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader, - ValueMapMSSA); - } - - SmallVector<PHINode*, 2> InsertedPHIs; - // If there were any uses of instructions in the duplicated block outside the - // loop, update them, inserting PHI nodes as required - RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap, - &InsertedPHIs); - - // Attach dbg.value intrinsics to the new phis if that phi uses a value that - // previously had debug metadata attached. This keeps the debug info - // up-to-date in the loop body. - if (!InsertedPHIs.empty()) - insertDebugValuesForPHIs(OrigHeader, InsertedPHIs); - - // NewHeader is now the header of the loop. - L->moveToHeader(NewHeader); - assert(L->getHeader() == NewHeader && "Latch block is our new header"); - - // Inform DT about changes to the CFG. - if (DT) { - // The OrigPreheader branches to the NewHeader and Exit now. Then, inform - // the DT about the removed edge to the OrigHeader (that got removed). - SmallVector<DominatorTree::UpdateType, 3> Updates; - Updates.push_back({DominatorTree::Insert, OrigPreheader, Exit}); - Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader}); - Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader}); - DT->applyUpdates(Updates); - - if (MSSAU) { - MSSAU->applyUpdates(Updates, *DT); - if (VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); - } - } - - // At this point, we've finished our major CFG changes. As part of cloning - // the loop into the preheader we've simplified instructions and the - // duplicated conditional branch may now be branching on a constant. If it is - // branching on a constant and if that constant means that we enter the loop, - // then we fold away the cond branch to an uncond branch. This simplifies the - // loop in cases important for nested loops, and it also means we don't have - // to split as many edges. - BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator()); - assert(PHBI->isConditional() && "Should be clone of BI condbr!"); - if (!isa<ConstantInt>(PHBI->getCondition()) || - PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) != - NewHeader) { - // The conditional branch can't be folded, handle the general case. - // Split edges as necessary to preserve LoopSimplify form. - - // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and - // thus is not a preheader anymore. - // Split the edge to form a real preheader. - BasicBlock *NewPH = SplitCriticalEdge( - OrigPreheader, NewHeader, - CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()); - NewPH->setName(NewHeader->getName() + ".lr.ph"); - - // Preserve canonical loop form, which means that 'Exit' should have only - // one predecessor. Note that Exit could be an exit block for multiple - // nested loops, causing both of the edges to now be critical and need to - // be split. - SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit)); - bool SplitLatchEdge = false; - for (BasicBlock *ExitPred : ExitPreds) { - // We only need to split loop exit edges. - Loop *PredLoop = LI->getLoopFor(ExitPred); - if (!PredLoop || PredLoop->contains(Exit) || - ExitPred->getTerminator()->isIndirectTerminator()) - continue; - SplitLatchEdge |= L->getLoopLatch() == ExitPred; - BasicBlock *ExitSplit = SplitCriticalEdge( - ExitPred, Exit, - CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()); - ExitSplit->moveBefore(Exit); - } - assert(SplitLatchEdge && - "Despite splitting all preds, failed to split latch exit?"); - } else { - // We can fold the conditional branch in the preheader, this makes things - // simpler. The first step is to remove the extra edge to the Exit block. - Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); - BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI); - NewBI->setDebugLoc(PHBI->getDebugLoc()); - PHBI->eraseFromParent(); - - // With our CFG finalized, update DomTree if it is available. - if (DT) DT->deleteEdge(OrigPreheader, Exit); - - // Update MSSA too, if available. - if (MSSAU) - MSSAU->removeEdge(OrigPreheader, Exit); - } - - assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation"); - assert(L->getLoopLatch() && "Invalid loop latch after loop rotation"); - - if (MSSAU && VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); - - // Now that the CFG and DomTree are in a consistent state again, try to merge - // the OrigHeader block into OrigLatch. This will succeed if they are - // connected by an unconditional branch. This is just a cleanup so the - // emitted code isn't too gross in this common case. - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); - MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU); - - if (MSSAU && VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); - - LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump()); - - ++NumRotated; - return true; -} - -/// Determine whether the instructions in this range may be safely and cheaply -/// speculated. This is not an important enough situation to develop complex -/// heuristics. We handle a single arithmetic instruction along with any type -/// conversions. -static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, - BasicBlock::iterator End, Loop *L) { - bool seenIncrement = false; - bool MultiExitLoop = false; - - if (!L->getExitingBlock()) - MultiExitLoop = true; - - for (BasicBlock::iterator I = Begin; I != End; ++I) { - - if (!isSafeToSpeculativelyExecute(&*I)) - return false; - - if (isa<DbgInfoIntrinsic>(I)) - continue; - - switch (I->getOpcode()) { - default: - return false; - case Instruction::GetElementPtr: - // GEPs are cheap if all indices are constant. - if (!cast<GEPOperator>(I)->hasAllConstantIndices()) - return false; - // fall-thru to increment case - LLVM_FALLTHROUGH; - case Instruction::Add: - case Instruction::Sub: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: { - Value *IVOpnd = - !isa<Constant>(I->getOperand(0)) - ? I->getOperand(0) - : !isa<Constant>(I->getOperand(1)) ? I->getOperand(1) : nullptr; - if (!IVOpnd) - return false; - - // If increment operand is used outside of the loop, this speculation - // could cause extra live range interference. - if (MultiExitLoop) { - for (User *UseI : IVOpnd->users()) { - auto *UserInst = cast<Instruction>(UseI); - if (!L->contains(UserInst)) - return false; - } - } - - if (seenIncrement) - return false; - seenIncrement = true; - break; - } - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - // ignore type conversions - break; - } - } - return true; -} - -/// Fold the loop tail into the loop exit by speculating the loop tail -/// instructions. Typically, this is a single post-increment. In the case of a -/// simple 2-block loop, hoisting the increment can be much better than -/// duplicating the entire loop header. In the case of loops with early exits, -/// rotation will not work anyway, but simplifyLoopLatch will put the loop in -/// canonical form so downstream passes can handle it. -/// -/// I don't believe this invalidates SCEV. -bool LoopRotate::simplifyLoopLatch(Loop *L) { - BasicBlock *Latch = L->getLoopLatch(); - if (!Latch || Latch->hasAddressTaken()) - return false; - - BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator()); - if (!Jmp || !Jmp->isUnconditional()) - return false; - - BasicBlock *LastExit = Latch->getSinglePredecessor(); - if (!LastExit || !L->isLoopExiting(LastExit)) - return false; - - BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator()); - if (!BI) - return false; - - if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L)) - return false; - - LLVM_DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into " - << LastExit->getName() << "\n"); - - // Hoist the instructions from Latch into LastExit. - Instruction *FirstLatchInst = &*(Latch->begin()); - LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(), - Latch->begin(), Jmp->getIterator()); - - // Update MemorySSA - if (MSSAU) - MSSAU->moveAllAfterMergeBlocks(Latch, LastExit, FirstLatchInst); - - unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1; - BasicBlock *Header = Jmp->getSuccessor(0); - assert(Header == L->getHeader() && "expected a backward branch"); - - // Remove Latch from the CFG so that LastExit becomes the new Latch. - BI->setSuccessor(FallThruPath, Header); - Latch->replaceSuccessorsPhiUsesWith(LastExit); - Jmp->eraseFromParent(); - - // Nuke the Latch block. - assert(Latch->empty() && "unable to evacuate Latch"); - LI->removeBlock(Latch); - if (DT) - DT->eraseNode(Latch); - Latch->eraseFromParent(); - - if (MSSAU && VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); - - return true; -} - -/// Rotate \c L, and return true if any modification was made. -bool LoopRotate::processLoop(Loop *L) { - // Save the loop metadata. - MDNode *LoopMD = L->getLoopID(); - - bool SimplifiedLatch = false; - - // Simplify the loop latch before attempting to rotate the header - // upward. Rotation may not be needed if the loop tail can be folded into the - // loop exit. - if (!RotationOnly) - SimplifiedLatch = simplifyLoopLatch(L); - - bool MadeChange = rotateLoop(L, SimplifiedLatch); - assert((!MadeChange || L->isLoopExiting(L->getLoopLatch())) && - "Loop latch should be exiting after loop-rotate."); - - // Restore the loop metadata. - // NB! We presume LoopRotation DOESN'T ADD its own metadata. - if ((MadeChange || SimplifiedLatch) && LoopMD) - L->setLoopID(LoopMD); - - return MadeChange || SimplifiedLatch; -} - - -/// The utility to convert a loop into a loop with bottom test. -bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, - AssumptionCache *AC, DominatorTree *DT, - ScalarEvolution *SE, MemorySSAUpdater *MSSAU, - const SimplifyQuery &SQ, bool RotationOnly = true, - unsigned Threshold = unsigned(-1), - bool IsUtilMode = true) { - if (MSSAU && VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); - LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly, - IsUtilMode); - if (MSSAU && VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); - - return LR.processLoop(L); -} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp deleted file mode 100644 index 7e6da02d5707..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ /dev/null @@ -1,920 +0,0 @@ -//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass performs several transformations to transform natural loops into a -// simpler form, which makes subsequent analyses and transformations simpler and -// more effective. -// -// Loop pre-header insertion guarantees that there is a single, non-critical -// entry edge from outside of the loop to the loop header. This simplifies a -// number of analyses and transformations, such as LICM. -// -// Loop exit-block insertion guarantees that all exit blocks from the loop -// (blocks which are outside of the loop that have predecessors inside of the -// loop) only have predecessors from inside of the loop (and are thus dominated -// by the loop header). This simplifies transformations such as store-sinking -// that are built into LICM. -// -// This pass also guarantees that loops will have exactly one backedge. -// -// Indirectbr instructions introduce several complications. If the loop -// contains or is entered by an indirectbr instruction, it may not be possible -// to transform the loop and make these guarantees. Client code should check -// that these conditions are true before relying on them. -// -// Similar complications arise from callbr instructions, particularly in -// asm-goto where blockaddress expressions are used. -// -// Note that the simplifycfg pass will clean up blocks which are split out but -// end up being unnecessary, so usage of this pass should not pessimize -// generated code. -// -// This pass obviously modifies the CFG, but updates loop information and -// dominator information. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/LoopSimplify.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SetOperations.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/DependenceAnalysis.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/MemorySSA.h" -#include "llvm/Analysis/MemorySSAUpdater.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/LoopUtils.h" -using namespace llvm; - -#define DEBUG_TYPE "loop-simplify" - -STATISTIC(NumNested , "Number of nested loops split out"); - -// If the block isn't already, move the new block to right after some 'outside -// block' block. This prevents the preheader from being placed inside the loop -// body, e.g. when the loop hasn't been rotated. -static void placeSplitBlockCarefully(BasicBlock *NewBB, - SmallVectorImpl<BasicBlock *> &SplitPreds, - Loop *L) { - // Check to see if NewBB is already well placed. - Function::iterator BBI = --NewBB->getIterator(); - for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { - if (&*BBI == SplitPreds[i]) - return; - } - - // If it isn't already after an outside block, move it after one. This is - // always good as it makes the uncond branch from the outside block into a - // fall-through. - - // Figure out *which* outside block to put this after. Prefer an outside - // block that neighbors a BB actually in the loop. - BasicBlock *FoundBB = nullptr; - for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { - Function::iterator BBI = SplitPreds[i]->getIterator(); - if (++BBI != NewBB->getParent()->end() && L->contains(&*BBI)) { - FoundBB = SplitPreds[i]; - break; - } - } - - // If our heuristic for a *good* bb to place this after doesn't find - // anything, just pick something. It's likely better than leaving it within - // the loop. - if (!FoundBB) - FoundBB = SplitPreds[0]; - NewBB->moveAfter(FoundBB); -} - -/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a -/// preheader, this method is called to insert one. This method has two phases: -/// preheader insertion and analysis updating. -/// -BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, - LoopInfo *LI, MemorySSAUpdater *MSSAU, - bool PreserveLCSSA) { - BasicBlock *Header = L->getHeader(); - - // Compute the set of predecessors of the loop that are not in the loop. - SmallVector<BasicBlock*, 8> OutsideBlocks; - for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); - PI != PE; ++PI) { - BasicBlock *P = *PI; - if (!L->contains(P)) { // Coming in from outside the loop? - // If the loop is branched to from an indirect terminator, we won't - // be able to fully transform the loop, because it prohibits - // edge splitting. - if (P->getTerminator()->isIndirectTerminator()) - return nullptr; - - // Keep track of it. - OutsideBlocks.push_back(P); - } - } - - // Split out the loop pre-header. - BasicBlock *PreheaderBB; - PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT, - LI, MSSAU, PreserveLCSSA); - if (!PreheaderBB) - return nullptr; - - LLVM_DEBUG(dbgs() << "LoopSimplify: Creating pre-header " - << PreheaderBB->getName() << "\n"); - - // Make sure that NewBB is put someplace intelligent, which doesn't mess up - // code layout too horribly. - placeSplitBlockCarefully(PreheaderBB, OutsideBlocks, L); - - return PreheaderBB; -} - -/// Add the specified block, and all of its predecessors, to the specified set, -/// if it's not already in there. Stop predecessor traversal when we reach -/// StopBlock. -static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, - std::set<BasicBlock*> &Blocks) { - SmallVector<BasicBlock *, 8> Worklist; - Worklist.push_back(InputBB); - do { - BasicBlock *BB = Worklist.pop_back_val(); - if (Blocks.insert(BB).second && BB != StopBlock) - // If BB is not already processed and it is not a stop block then - // insert its predecessor in the work list - for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { - BasicBlock *WBB = *I; - Worklist.push_back(WBB); - } - } while (!Worklist.empty()); -} - -/// The first part of loop-nestification is to find a PHI node that tells -/// us how to partition the loops. -static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT, - AssumptionCache *AC) { - const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { - PHINode *PN = cast<PHINode>(I); - ++I; - if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) { - // This is a degenerate PHI already, don't modify it! - PN->replaceAllUsesWith(V); - PN->eraseFromParent(); - continue; - } - - // Scan this PHI node looking for a use of the PHI node by itself. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == PN && - L->contains(PN->getIncomingBlock(i))) - // We found something tasty to remove. - return PN; - } - return nullptr; -} - -/// If this loop has multiple backedges, try to pull one of them out into -/// a nested loop. -/// -/// This is important for code that looks like -/// this: -/// -/// Loop: -/// ... -/// br cond, Loop, Next -/// ... -/// br cond2, Loop, Out -/// -/// To identify this common case, we look at the PHI nodes in the header of the -/// loop. PHI nodes with unchanging values on one backedge correspond to values -/// that change in the "outer" loop, but not in the "inner" loop. -/// -/// If we are able to separate out a loop, return the new outer loop that was -/// created. -/// -static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, - DominatorTree *DT, LoopInfo *LI, - ScalarEvolution *SE, bool PreserveLCSSA, - AssumptionCache *AC, MemorySSAUpdater *MSSAU) { - // Don't try to separate loops without a preheader. - if (!Preheader) - return nullptr; - - // The header is not a landing pad; preheader insertion should ensure this. - BasicBlock *Header = L->getHeader(); - assert(!Header->isEHPad() && "Can't insert backedge to EH pad"); - - PHINode *PN = findPHIToPartitionLoops(L, DT, AC); - if (!PN) return nullptr; // No known way to partition. - - // Pull out all predecessors that have varying values in the loop. This - // handles the case when a PHI node has multiple instances of itself as - // arguments. - SmallVector<BasicBlock*, 8> OuterLoopPreds; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - if (PN->getIncomingValue(i) != PN || - !L->contains(PN->getIncomingBlock(i))) { - // We can't split indirect control flow edges. - if (PN->getIncomingBlock(i)->getTerminator()->isIndirectTerminator()) - return nullptr; - OuterLoopPreds.push_back(PN->getIncomingBlock(i)); - } - } - LLVM_DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); - - // If ScalarEvolution is around and knows anything about values in - // this loop, tell it to forget them, because we're about to - // substantially change it. - if (SE) - SE->forgetLoop(L); - - BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", - DT, LI, MSSAU, PreserveLCSSA); - - // Make sure that NewBB is put someplace intelligent, which doesn't mess up - // code layout too horribly. - placeSplitBlockCarefully(NewBB, OuterLoopPreds, L); - - // Create the new outer loop. - Loop *NewOuter = LI->AllocateLoop(); - - // Change the parent loop to use the outer loop as its child now. - if (Loop *Parent = L->getParentLoop()) - Parent->replaceChildLoopWith(L, NewOuter); - else - LI->changeTopLevelLoop(L, NewOuter); - - // L is now a subloop of our outer loop. - NewOuter->addChildLoop(L); - - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) - NewOuter->addBlockEntry(*I); - - // Now reset the header in L, which had been moved by - // SplitBlockPredecessors for the outer loop. - L->moveToHeader(Header); - - // Determine which blocks should stay in L and which should be moved out to - // the Outer loop now. - std::set<BasicBlock*> BlocksInL; - for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) { - BasicBlock *P = *PI; - if (DT->dominates(Header, P)) - addBlockAndPredsToSet(P, Header, BlocksInL); - } - - // Scan all of the loop children of L, moving them to OuterLoop if they are - // not part of the inner loop. - const std::vector<Loop*> &SubLoops = L->getSubLoops(); - for (size_t I = 0; I != SubLoops.size(); ) - if (BlocksInL.count(SubLoops[I]->getHeader())) - ++I; // Loop remains in L - else - NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I)); - - SmallVector<BasicBlock *, 8> OuterLoopBlocks; - OuterLoopBlocks.push_back(NewBB); - // Now that we know which blocks are in L and which need to be moved to - // OuterLoop, move any blocks that need it. - for (unsigned i = 0; i != L->getBlocks().size(); ++i) { - BasicBlock *BB = L->getBlocks()[i]; - if (!BlocksInL.count(BB)) { - // Move this block to the parent, updating the exit blocks sets - L->removeBlockFromLoop(BB); - if ((*LI)[BB] == L) { - LI->changeLoopFor(BB, NewOuter); - OuterLoopBlocks.push_back(BB); - } - --i; - } - } - - // Split edges to exit blocks from the inner loop, if they emerged in the - // process of separating the outer one. - formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA); - - if (PreserveLCSSA) { - // Fix LCSSA form for L. Some values, which previously were only used inside - // L, can now be used in NewOuter loop. We need to insert phi-nodes for them - // in corresponding exit blocks. - // We don't need to form LCSSA recursively, because there cannot be uses - // inside a newly created loop of defs from inner loops as those would - // already be a use of an LCSSA phi node. - formLCSSA(*L, *DT, LI, SE); - - assert(NewOuter->isRecursivelyLCSSAForm(*DT, *LI) && - "LCSSA is broken after separating nested loops!"); - } - - return NewOuter; -} - -/// This method is called when the specified loop has more than one -/// backedge in it. -/// -/// If this occurs, revector all of these backedges to target a new basic block -/// and have that block branch to the loop header. This ensures that loops -/// have exactly one backedge. -static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, - DominatorTree *DT, LoopInfo *LI, - MemorySSAUpdater *MSSAU) { - assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); - - // Get information about the loop - BasicBlock *Header = L->getHeader(); - Function *F = Header->getParent(); - - // Unique backedge insertion currently depends on having a preheader. - if (!Preheader) - return nullptr; - - // The header is not an EH pad; preheader insertion should ensure this. - assert(!Header->isEHPad() && "Can't insert backedge to EH pad"); - - // Figure out which basic blocks contain back-edges to the loop header. - std::vector<BasicBlock*> BackedgeBlocks; - for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ - BasicBlock *P = *I; - - // Indirect edges cannot be split, so we must fail if we find one. - if (P->getTerminator()->isIndirectTerminator()) - return nullptr; - - if (P != Preheader) BackedgeBlocks.push_back(P); - } - - // Create and insert the new backedge block... - BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), - Header->getName() + ".backedge", F); - BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); - BETerminator->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc()); - - LLVM_DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block " - << BEBlock->getName() << "\n"); - - // Move the new backedge block to right after the last backedge block. - Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator(); - F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock); - - // Now that the block has been inserted into the function, create PHI nodes in - // the backedge block which correspond to any PHI nodes in the header block. - for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { - PHINode *PN = cast<PHINode>(I); - PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(), - PN->getName()+".be", BETerminator); - - // Loop over the PHI node, moving all entries except the one for the - // preheader over to the new PHI node. - unsigned PreheaderIdx = ~0U; - bool HasUniqueIncomingValue = true; - Value *UniqueValue = nullptr; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - BasicBlock *IBB = PN->getIncomingBlock(i); - Value *IV = PN->getIncomingValue(i); - if (IBB == Preheader) { - PreheaderIdx = i; - } else { - NewPN->addIncoming(IV, IBB); - if (HasUniqueIncomingValue) { - if (!UniqueValue) - UniqueValue = IV; - else if (UniqueValue != IV) - HasUniqueIncomingValue = false; - } - } - } - - // Delete all of the incoming values from the old PN except the preheader's - assert(PreheaderIdx != ~0U && "PHI has no preheader entry??"); - if (PreheaderIdx != 0) { - PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx)); - PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx)); - } - // Nuke all entries except the zero'th. - for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i) - PN->removeIncomingValue(e-i, false); - - // Finally, add the newly constructed PHI node as the entry for the BEBlock. - PN->addIncoming(NewPN, BEBlock); - - // As an optimization, if all incoming values in the new PhiNode (which is a - // subset of the incoming values of the old PHI node) have the same value, - // eliminate the PHI Node. - if (HasUniqueIncomingValue) { - NewPN->replaceAllUsesWith(UniqueValue); - BEBlock->getInstList().erase(NewPN); - } - } - - // Now that all of the PHI nodes have been inserted and adjusted, modify the - // backedge blocks to jump to the BEBlock instead of the header. - // If one of the backedges has llvm.loop metadata attached, we remove - // it from the backedge and add it to BEBlock. - unsigned LoopMDKind = BEBlock->getContext().getMDKindID("llvm.loop"); - MDNode *LoopMD = nullptr; - for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) { - Instruction *TI = BackedgeBlocks[i]->getTerminator(); - if (!LoopMD) - LoopMD = TI->getMetadata(LoopMDKind); - TI->setMetadata(LoopMDKind, nullptr); - TI->replaceSuccessorWith(Header, BEBlock); - } - BEBlock->getTerminator()->setMetadata(LoopMDKind, LoopMD); - - //===--- Update all analyses which we must preserve now -----------------===// - - // Update Loop Information - we know that this block is now in the current - // loop and all parent loops. - L->addBasicBlockToLoop(BEBlock, *LI); - - // Update dominator information - DT->splitBlock(BEBlock); - - if (MSSAU) - MSSAU->updatePhisWhenInsertingUniqueBackedgeBlock(Header, Preheader, - BEBlock); - - return BEBlock; -} - -/// Simplify one loop and queue further loops for simplification. -static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist, - DominatorTree *DT, LoopInfo *LI, - ScalarEvolution *SE, AssumptionCache *AC, - MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { - bool Changed = false; - if (MSSAU && VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); - -ReprocessLoop: - - // Check to see that no blocks (other than the header) in this loop have - // predecessors that are not in the loop. This is not valid for natural - // loops, but can occur if the blocks are unreachable. Since they are - // unreachable we can just shamelessly delete those CFG edges! - for (Loop::block_iterator BB = L->block_begin(), E = L->block_end(); - BB != E; ++BB) { - if (*BB == L->getHeader()) continue; - - SmallPtrSet<BasicBlock*, 4> BadPreds; - for (pred_iterator PI = pred_begin(*BB), - PE = pred_end(*BB); PI != PE; ++PI) { - BasicBlock *P = *PI; - if (!L->contains(P)) - BadPreds.insert(P); - } - - // Delete each unique out-of-loop (and thus dead) predecessor. - for (BasicBlock *P : BadPreds) { - - LLVM_DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor " - << P->getName() << "\n"); - - // Zap the dead pred's terminator and replace it with unreachable. - Instruction *TI = P->getTerminator(); - changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA, - /*DTU=*/nullptr, MSSAU); - Changed = true; - } - } - - if (MSSAU && VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); - - // If there are exiting blocks with branches on undef, resolve the undef in - // the direction which will exit the loop. This will help simplify loop - // trip count computations. - SmallVector<BasicBlock*, 8> ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - for (BasicBlock *ExitingBlock : ExitingBlocks) - if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator())) - if (BI->isConditional()) { - if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) { - - LLVM_DEBUG(dbgs() - << "LoopSimplify: Resolving \"br i1 undef\" to exit in " - << ExitingBlock->getName() << "\n"); - - BI->setCondition(ConstantInt::get(Cond->getType(), - !L->contains(BI->getSuccessor(0)))); - - Changed = true; - } - } - - // Does the loop already have a preheader? If so, don't insert one. - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) { - Preheader = InsertPreheaderForLoop(L, DT, LI, MSSAU, PreserveLCSSA); - if (Preheader) - Changed = true; - } - - // Next, check to make sure that all exit nodes of the loop only have - // predecessors that are inside of the loop. This check guarantees that the - // loop preheader/header will dominate the exit blocks. If the exit block has - // predecessors from outside of the loop, split the edge now. - if (formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA)) - Changed = true; - - if (MSSAU && VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); - - // If the header has more than two predecessors at this point (from the - // preheader and from multiple backedges), we must adjust the loop. - BasicBlock *LoopLatch = L->getLoopLatch(); - if (!LoopLatch) { - // If this is really a nested loop, rip it out into a child loop. Don't do - // this for loops with a giant number of backedges, just factor them into a - // common backedge instead. - if (L->getNumBackEdges() < 8) { - if (Loop *OuterL = separateNestedLoop(L, Preheader, DT, LI, SE, - PreserveLCSSA, AC, MSSAU)) { - ++NumNested; - // Enqueue the outer loop as it should be processed next in our - // depth-first nest walk. - Worklist.push_back(OuterL); - - // This is a big restructuring change, reprocess the whole loop. - Changed = true; - // GCC doesn't tail recursion eliminate this. - // FIXME: It isn't clear we can't rely on LLVM to TRE this. - goto ReprocessLoop; - } - } - - // If we either couldn't, or didn't want to, identify nesting of the loops, - // insert a new block that all backedges target, then make it jump to the - // loop header. - LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI, MSSAU); - if (LoopLatch) - Changed = true; - } - - if (MSSAU && VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); - - const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - - // Scan over the PHI nodes in the loop header. Since they now have only two - // incoming values (the loop is canonicalized), we may have simplified the PHI - // down to 'X = phi [X, Y]', which should be replaced with 'Y'. - PHINode *PN; - for (BasicBlock::iterator I = L->getHeader()->begin(); - (PN = dyn_cast<PHINode>(I++)); ) - if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) { - if (SE) SE->forgetValue(PN); - if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) { - PN->replaceAllUsesWith(V); - PN->eraseFromParent(); - } - } - - // If this loop has multiple exits and the exits all go to the same - // block, attempt to merge the exits. This helps several passes, such - // as LoopRotation, which do not support loops with multiple exits. - // SimplifyCFG also does this (and this code uses the same utility - // function), however this code is loop-aware, where SimplifyCFG is - // not. That gives it the advantage of being able to hoist - // loop-invariant instructions out of the way to open up more - // opportunities, and the disadvantage of having the responsibility - // to preserve dominator information. - auto HasUniqueExitBlock = [&]() { - BasicBlock *UniqueExit = nullptr; - for (auto *ExitingBB : ExitingBlocks) - for (auto *SuccBB : successors(ExitingBB)) { - if (L->contains(SuccBB)) - continue; - - if (!UniqueExit) - UniqueExit = SuccBB; - else if (UniqueExit != SuccBB) - return false; - } - - return true; - }; - if (HasUniqueExitBlock()) { - for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { - BasicBlock *ExitingBlock = ExitingBlocks[i]; - if (!ExitingBlock->getSinglePredecessor()) continue; - BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); - if (!BI || !BI->isConditional()) continue; - CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); - if (!CI || CI->getParent() != ExitingBlock) continue; - - // Attempt to hoist out all instructions except for the - // comparison and the branch. - bool AllInvariant = true; - bool AnyInvariant = false; - for (auto I = ExitingBlock->instructionsWithoutDebug().begin(); &*I != BI; ) { - Instruction *Inst = &*I++; - if (Inst == CI) - continue; - if (!L->makeLoopInvariant( - Inst, AnyInvariant, - Preheader ? Preheader->getTerminator() : nullptr, MSSAU)) { - AllInvariant = false; - break; - } - } - if (AnyInvariant) { - Changed = true; - // The loop disposition of all SCEV expressions that depend on any - // hoisted values have also changed. - if (SE) - SE->forgetLoopDispositions(L); - } - if (!AllInvariant) continue; - - // The block has now been cleared of all instructions except for - // a comparison and a conditional branch. SimplifyCFG may be able - // to fold it now. - if (!FoldBranchToCommonDest(BI, MSSAU)) - continue; - - // Success. The block is now dead, so remove it from the loop, - // update the dominator tree and delete it. - LLVM_DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " - << ExitingBlock->getName() << "\n"); - - assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock)); - Changed = true; - LI->removeBlock(ExitingBlock); - - DomTreeNode *Node = DT->getNode(ExitingBlock); - const std::vector<DomTreeNodeBase<BasicBlock> *> &Children = - Node->getChildren(); - while (!Children.empty()) { - DomTreeNode *Child = Children.front(); - DT->changeImmediateDominator(Child, Node->getIDom()); - } - DT->eraseNode(ExitingBlock); - if (MSSAU) { - SmallSetVector<BasicBlock *, 8> ExitBlockSet; - ExitBlockSet.insert(ExitingBlock); - MSSAU->removeBlocks(ExitBlockSet); - } - - BI->getSuccessor(0)->removePredecessor( - ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA); - BI->getSuccessor(1)->removePredecessor( - ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA); - ExitingBlock->eraseFromParent(); - } - } - - // Changing exit conditions for blocks may affect exit counts of this loop and - // any of its paretns, so we must invalidate the entire subtree if we've made - // any changes. - if (Changed && SE) - SE->forgetTopmostLoop(L); - - if (MSSAU && VerifyMemorySSA) - MSSAU->getMemorySSA()->verifyMemorySSA(); - - return Changed; -} - -bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, - ScalarEvolution *SE, AssumptionCache *AC, - MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { - bool Changed = false; - -#ifndef NDEBUG - // If we're asked to preserve LCSSA, the loop nest needs to start in LCSSA - // form. - if (PreserveLCSSA) { - assert(DT && "DT not available."); - assert(LI && "LI not available."); - assert(L->isRecursivelyLCSSAForm(*DT, *LI) && - "Requested to preserve LCSSA, but it's already broken."); - } -#endif - - // Worklist maintains our depth-first queue of loops in this nest to process. - SmallVector<Loop *, 4> Worklist; - Worklist.push_back(L); - - // Walk the worklist from front to back, pushing newly found sub loops onto - // the back. This will let us process loops from back to front in depth-first - // order. We can use this simple process because loops form a tree. - for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { - Loop *L2 = Worklist[Idx]; - Worklist.append(L2->begin(), L2->end()); - } - - while (!Worklist.empty()) - Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE, - AC, MSSAU, PreserveLCSSA); - - return Changed; -} - -namespace { - struct LoopSimplify : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - LoopSimplify() : FunctionPass(ID) { - initializeLoopSimplifyPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionCacheTracker>(); - - // We need loop information to identify the loops... - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - - AU.addRequired<LoopInfoWrapperPass>(); - AU.addPreserved<LoopInfoWrapperPass>(); - - AU.addPreserved<BasicAAWrapperPass>(); - AU.addPreserved<AAResultsWrapperPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - AU.addPreserved<ScalarEvolutionWrapperPass>(); - AU.addPreserved<SCEVAAWrapperPass>(); - AU.addPreservedID(LCSSAID); - AU.addPreserved<DependenceAnalysisWrapperPass>(); - AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. - AU.addPreserved<BranchProbabilityInfoWrapperPass>(); - if (EnableMSSALoopDependency) - AU.addPreserved<MemorySSAWrapperPass>(); - } - - /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. - void verifyAnalysis() const override; - }; -} - -char LoopSimplify::ID = 0; -INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify", - "Canonicalize natural loops", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", - "Canonicalize natural loops", false, false) - -// Publicly exposed interface to pass... -char &llvm::LoopSimplifyID = LoopSimplify::ID; -Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } - -/// runOnFunction - Run down all loops in the CFG (recursively, but we could do -/// it in any convenient order) inserting preheaders... -/// -bool LoopSimplify::runOnFunction(Function &F) { - bool Changed = false; - LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); - ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr; - AssumptionCache *AC = - &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - MemorySSA *MSSA = nullptr; - std::unique_ptr<MemorySSAUpdater> MSSAU; - if (EnableMSSALoopDependency) { - auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); - if (MSSAAnalysis) { - MSSA = &MSSAAnalysis->getMSSA(); - MSSAU = make_unique<MemorySSAUpdater>(MSSA); - } - } - - bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); - - // Simplify each loop nest in the function. - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= simplifyLoop(*I, DT, LI, SE, AC, MSSAU.get(), PreserveLCSSA); - -#ifndef NDEBUG - if (PreserveLCSSA) { - bool InLCSSA = all_of( - *LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT, *LI); }); - assert(InLCSSA && "LCSSA is broken after loop-simplify."); - } -#endif - return Changed; -} - -PreservedAnalyses LoopSimplifyPass::run(Function &F, - FunctionAnalysisManager &AM) { - bool Changed = false; - LoopInfo *LI = &AM.getResult<LoopAnalysis>(F); - DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); - ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F); - AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F); - - // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA - // after simplifying the loops. MemorySSA is not preserved either. - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= - simplifyLoop(*I, DT, LI, SE, AC, nullptr, /*PreserveLCSSA*/ false); - - if (!Changed) - return PreservedAnalyses::all(); - - PreservedAnalyses PA; - PA.preserve<DominatorTreeAnalysis>(); - PA.preserve<LoopAnalysis>(); - PA.preserve<BasicAA>(); - PA.preserve<GlobalsAA>(); - PA.preserve<SCEVAA>(); - PA.preserve<ScalarEvolutionAnalysis>(); - PA.preserve<DependenceAnalysis>(); - // BPI maps conditional terminators to probabilities, LoopSimplify can insert - // blocks, but it does so only by splitting existing blocks and edges. This - // results in the interesting property that all new terminators inserted are - // unconditional branches which do not appear in BPI. All deletions are - // handled via ValueHandle callbacks w/in BPI. - PA.preserve<BranchProbabilityAnalysis>(); - return PA; -} - -// FIXME: Restore this code when we re-enable verification in verifyAnalysis -// below. -#if 0 -static void verifyLoop(Loop *L) { - // Verify subloops. - for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) - verifyLoop(*I); - - // It used to be possible to just assert L->isLoopSimplifyForm(), however - // with the introduction of indirectbr, there are now cases where it's - // not possible to transform a loop as necessary. We can at least check - // that there is an indirectbr near any time there's trouble. - - // Indirectbr can interfere with preheader and unique backedge insertion. - if (!L->getLoopPreheader() || !L->getLoopLatch()) { - bool HasIndBrPred = false; - for (pred_iterator PI = pred_begin(L->getHeader()), - PE = pred_end(L->getHeader()); PI != PE; ++PI) - if (isa<IndirectBrInst>((*PI)->getTerminator())) { - HasIndBrPred = true; - break; - } - assert(HasIndBrPred && - "LoopSimplify has no excuse for missing loop header info!"); - (void)HasIndBrPred; - } - - // Indirectbr can interfere with exit block canonicalization. - if (!L->hasDedicatedExits()) { - bool HasIndBrExiting = false; - SmallVector<BasicBlock*, 8> ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { - if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) { - HasIndBrExiting = true; - break; - } - } - - assert(HasIndBrExiting && - "LoopSimplify has no excuse for missing exit block info!"); - (void)HasIndBrExiting; - } -} -#endif - -void LoopSimplify::verifyAnalysis() const { - // FIXME: This routine is being called mid-way through the loop pass manager - // as loop passes destroy this analysis. That's actually fine, but we have no - // way of expressing that here. Once all of the passes that destroy this are - // hoisted out of the loop pass manager we can add back verification here. -#if 0 - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - verifyLoop(*I); -#endif -} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp deleted file mode 100644 index 4a1edb3700c0..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ /dev/null @@ -1,978 +0,0 @@ -//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements some loop unrolling utilities. It does not define any -// actual pass or policy, but provides a single function to perform loop -// unrolling. -// -// The process of unrolling can produce extraneous basic blocks linked with -// unconditional branches. This will be corrected in the future. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LoopIterator.h" -#include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/LoopSimplify.h" -#include "llvm/Transforms/Utils/LoopUtils.h" -#include "llvm/Transforms/Utils/SimplifyIndVar.h" -#include "llvm/Transforms/Utils/UnrollLoop.h" -using namespace llvm; - -#define DEBUG_TYPE "loop-unroll" - -// TODO: Should these be here or in LoopUnroll? -STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); -STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); -STATISTIC(NumUnrolledWithHeader, "Number of loops unrolled without a " - "conditional latch (completely or otherwise)"); - -static cl::opt<bool> -UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden, - cl::desc("Allow runtime unrolled loops to be unrolled " - "with epilog instead of prolog.")); - -static cl::opt<bool> -UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden, - cl::desc("Verify domtree after unrolling"), -#ifdef EXPENSIVE_CHECKS - cl::init(true) -#else - cl::init(false) -#endif - ); - -/// Convert the instruction operands from referencing the current values into -/// those specified by VMap. -void llvm::remapInstruction(Instruction *I, ValueToValueMapTy &VMap) { - for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { - Value *Op = I->getOperand(op); - - // Unwrap arguments of dbg.value intrinsics. - bool Wrapped = false; - if (auto *V = dyn_cast<MetadataAsValue>(Op)) - if (auto *Unwrapped = dyn_cast<ValueAsMetadata>(V->getMetadata())) { - Op = Unwrapped->getValue(); - Wrapped = true; - } - - auto wrap = [&](Value *V) { - auto &C = I->getContext(); - return Wrapped ? MetadataAsValue::get(C, ValueAsMetadata::get(V)) : V; - }; - - ValueToValueMapTy::iterator It = VMap.find(Op); - if (It != VMap.end()) - I->setOperand(op, wrap(It->second)); - } - - if (PHINode *PN = dyn_cast<PHINode>(I)) { - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i)); - if (It != VMap.end()) - PN->setIncomingBlock(i, cast<BasicBlock>(It->second)); - } - } -} - -/// Check if unrolling created a situation where we need to insert phi nodes to -/// preserve LCSSA form. -/// \param Blocks is a vector of basic blocks representing unrolled loop. -/// \param L is the outer loop. -/// It's possible that some of the blocks are in L, and some are not. In this -/// case, if there is a use is outside L, and definition is inside L, we need to -/// insert a phi-node, otherwise LCSSA will be broken. -/// The function is just a helper function for llvm::UnrollLoop that returns -/// true if this situation occurs, indicating that LCSSA needs to be fixed. -static bool needToInsertPhisForLCSSA(Loop *L, std::vector<BasicBlock *> Blocks, - LoopInfo *LI) { - for (BasicBlock *BB : Blocks) { - if (LI->getLoopFor(BB) == L) - continue; - for (Instruction &I : *BB) { - for (Use &U : I.operands()) { - if (auto Def = dyn_cast<Instruction>(U)) { - Loop *DefLoop = LI->getLoopFor(Def->getParent()); - if (!DefLoop) - continue; - if (DefLoop->contains(L)) - return true; - } - } - } - } - return false; -} - -/// Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary -/// and adds a mapping from the original loop to the new loop to NewLoops. -/// Returns nullptr if no new loop was created and a pointer to the -/// original loop OriginalBB was part of otherwise. -const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB, - BasicBlock *ClonedBB, LoopInfo *LI, - NewLoopsMap &NewLoops) { - // Figure out which loop New is in. - const Loop *OldLoop = LI->getLoopFor(OriginalBB); - assert(OldLoop && "Should (at least) be in the loop being unrolled!"); - - Loop *&NewLoop = NewLoops[OldLoop]; - if (!NewLoop) { - // Found a new sub-loop. - assert(OriginalBB == OldLoop->getHeader() && - "Header should be first in RPO"); - - NewLoop = LI->AllocateLoop(); - Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); - - if (NewLoopParent) - NewLoopParent->addChildLoop(NewLoop); - else - LI->addTopLevelLoop(NewLoop); - - NewLoop->addBasicBlockToLoop(ClonedBB, *LI); - return OldLoop; - } else { - NewLoop->addBasicBlockToLoop(ClonedBB, *LI); - return nullptr; - } -} - -/// The function chooses which type of unroll (epilog or prolog) is more -/// profitabale. -/// Epilog unroll is more profitable when there is PHI that starts from -/// constant. In this case epilog will leave PHI start from constant, -/// but prolog will convert it to non-constant. -/// -/// loop: -/// PN = PHI [I, Latch], [CI, PreHeader] -/// I = foo(PN) -/// ... -/// -/// Epilog unroll case. -/// loop: -/// PN = PHI [I2, Latch], [CI, PreHeader] -/// I1 = foo(PN) -/// I2 = foo(I1) -/// ... -/// Prolog unroll case. -/// NewPN = PHI [PrologI, Prolog], [CI, PreHeader] -/// loop: -/// PN = PHI [I2, Latch], [NewPN, PreHeader] -/// I1 = foo(PN) -/// I2 = foo(I1) -/// ... -/// -static bool isEpilogProfitable(Loop *L) { - BasicBlock *PreHeader = L->getLoopPreheader(); - BasicBlock *Header = L->getHeader(); - assert(PreHeader && Header); - for (const PHINode &PN : Header->phis()) { - if (isa<ConstantInt>(PN.getIncomingValueForBlock(PreHeader))) - return true; - } - return false; -} - -/// Perform some cleanup and simplifications on loops after unrolling. It is -/// useful to simplify the IV's in the new loop, as well as do a quick -/// simplify/dce pass of the instructions. -void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, - ScalarEvolution *SE, DominatorTree *DT, - AssumptionCache *AC) { - // Simplify any new induction variables in the partially unrolled loop. - if (SE && SimplifyIVs) { - SmallVector<WeakTrackingVH, 16> DeadInsts; - simplifyLoopIVs(L, SE, DT, LI, DeadInsts); - - // Aggressively clean up dead instructions that simplifyLoopIVs already - // identified. Any remaining should be cleaned up below. - while (!DeadInsts.empty()) - if (Instruction *Inst = - dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); - } - - // At this point, the code is well formed. We now do a quick sweep over the - // inserted code, doing constant propagation and dead code elimination as we - // go. - const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - for (BasicBlock *BB : L->getBlocks()) { - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { - Instruction *Inst = &*I++; - - if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC})) - if (LI->replacementPreservesLCSSAForm(Inst, V)) - Inst->replaceAllUsesWith(V); - if (isInstructionTriviallyDead(Inst)) - BB->getInstList().erase(Inst); - } - } - - // TODO: after peeling or unrolling, previously loop variant conditions are - // likely to fold to constants, eagerly propagating those here will require - // fewer cleanup passes to be run. Alternatively, a LoopEarlyCSE might be - // appropriate. -} - -/// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling -/// can only fail when the loop's latch block is not terminated by a conditional -/// branch instruction. However, if the trip count (and multiple) are not known, -/// loop unrolling will mostly produce more code that is no faster. -/// -/// TripCount is the upper bound of the iteration on which control exits -/// LatchBlock. Control may exit the loop prior to TripCount iterations either -/// via an early branch in other loop block or via LatchBlock terminator. This -/// is relaxed from the general definition of trip count which is the number of -/// times the loop header executes. Note that UnrollLoop assumes that the loop -/// counter test is in LatchBlock in order to remove unnecesssary instances of -/// the test. If control can exit the loop from the LatchBlock's terminator -/// prior to TripCount iterations, flag PreserveCondBr needs to be set. -/// -/// PreserveCondBr indicates whether the conditional branch of the LatchBlock -/// needs to be preserved. It is needed when we use trip count upper bound to -/// fully unroll the loop. If PreserveOnlyFirst is also set then only the first -/// conditional branch needs to be preserved. -/// -/// Similarly, TripMultiple divides the number of times that the LatchBlock may -/// execute without exiting the loop. -/// -/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that -/// have a runtime (i.e. not compile time constant) trip count. Unrolling these -/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count" -/// iterations before branching into the unrolled loop. UnrollLoop will not -/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and -/// AllowExpensiveTripCount is false. -/// -/// If we want to perform PGO-based loop peeling, PeelCount is set to the -/// number of iterations we want to peel off. -/// -/// The LoopInfo Analysis that is passed will be kept consistent. -/// -/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and -/// DominatorTree if they are non-null. -/// -/// If RemainderLoop is non-null, it will receive the remainder loop (if -/// required and not fully unrolled). -LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, - ScalarEvolution *SE, DominatorTree *DT, - AssumptionCache *AC, - OptimizationRemarkEmitter *ORE, - bool PreserveLCSSA, Loop **RemainderLoop) { - - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) { - LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); - return LoopUnrollResult::Unmodified; - } - - BasicBlock *LatchBlock = L->getLoopLatch(); - if (!LatchBlock) { - LLVM_DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); - return LoopUnrollResult::Unmodified; - } - - // Loops with indirectbr cannot be cloned. - if (!L->isSafeToClone()) { - LLVM_DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n"); - return LoopUnrollResult::Unmodified; - } - - // The current loop unroll pass can unroll loops with a single latch or header - // that's a conditional branch exiting the loop. - // FIXME: The implementation can be extended to work with more complicated - // cases, e.g. loops with multiple latches. - BasicBlock *Header = L->getHeader(); - BranchInst *HeaderBI = dyn_cast<BranchInst>(Header->getTerminator()); - BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); - - // FIXME: Support loops without conditional latch and multiple exiting blocks. - if (!BI || - (BI->isUnconditional() && (!HeaderBI || HeaderBI->isUnconditional() || - L->getExitingBlock() != Header))) { - LLVM_DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional " - "branch in the latch or header.\n"); - return LoopUnrollResult::Unmodified; - } - - auto CheckLatchSuccessors = [&](unsigned S1, unsigned S2) { - return BI->isConditional() && BI->getSuccessor(S1) == Header && - !L->contains(BI->getSuccessor(S2)); - }; - - // If we have a conditional latch, it must exit the loop. - if (BI && BI->isConditional() && !CheckLatchSuccessors(0, 1) && - !CheckLatchSuccessors(1, 0)) { - LLVM_DEBUG( - dbgs() << "Can't unroll; a conditional latch must exit the loop"); - return LoopUnrollResult::Unmodified; - } - - auto CheckHeaderSuccessors = [&](unsigned S1, unsigned S2) { - return HeaderBI && HeaderBI->isConditional() && - L->contains(HeaderBI->getSuccessor(S1)) && - !L->contains(HeaderBI->getSuccessor(S2)); - }; - - // If we do not have a conditional latch, the header must exit the loop. - if (BI && !BI->isConditional() && HeaderBI && HeaderBI->isConditional() && - !CheckHeaderSuccessors(0, 1) && !CheckHeaderSuccessors(1, 0)) { - LLVM_DEBUG(dbgs() << "Can't unroll; conditional header must exit the loop"); - return LoopUnrollResult::Unmodified; - } - - if (Header->hasAddressTaken()) { - // The loop-rotate pass can be helpful to avoid this in many cases. - LLVM_DEBUG( - dbgs() << " Won't unroll loop: address of header block is taken.\n"); - return LoopUnrollResult::Unmodified; - } - - if (ULO.TripCount != 0) - LLVM_DEBUG(dbgs() << " Trip Count = " << ULO.TripCount << "\n"); - if (ULO.TripMultiple != 1) - LLVM_DEBUG(dbgs() << " Trip Multiple = " << ULO.TripMultiple << "\n"); - - // Effectively "DCE" unrolled iterations that are beyond the tripcount - // and will never be executed. - if (ULO.TripCount != 0 && ULO.Count > ULO.TripCount) - ULO.Count = ULO.TripCount; - - // Don't enter the unroll code if there is nothing to do. - if (ULO.TripCount == 0 && ULO.Count < 2 && ULO.PeelCount == 0) { - LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n"); - return LoopUnrollResult::Unmodified; - } - - assert(ULO.Count > 0); - assert(ULO.TripMultiple > 0); - assert(ULO.TripCount == 0 || ULO.TripCount % ULO.TripMultiple == 0); - - // Are we eliminating the loop control altogether? - bool CompletelyUnroll = ULO.Count == ULO.TripCount; - SmallVector<BasicBlock *, 4> ExitBlocks; - L->getExitBlocks(ExitBlocks); - std::vector<BasicBlock*> OriginalLoopBlocks = L->getBlocks(); - - // Go through all exits of L and see if there are any phi-nodes there. We just - // conservatively assume that they're inserted to preserve LCSSA form, which - // means that complete unrolling might break this form. We need to either fix - // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For - // now we just recompute LCSSA for the outer loop, but it should be possible - // to fix it in-place. - bool NeedToFixLCSSA = PreserveLCSSA && CompletelyUnroll && - any_of(ExitBlocks, [](const BasicBlock *BB) { - return isa<PHINode>(BB->begin()); - }); - - // We assume a run-time trip count if the compiler cannot - // figure out the loop trip count and the unroll-runtime - // flag is specified. - bool RuntimeTripCount = - (ULO.TripCount == 0 && ULO.Count > 0 && ULO.AllowRuntime); - - assert((!RuntimeTripCount || !ULO.PeelCount) && - "Did not expect runtime trip-count unrolling " - "and peeling for the same loop"); - - bool Peeled = false; - if (ULO.PeelCount) { - Peeled = peelLoop(L, ULO.PeelCount, LI, SE, DT, AC, PreserveLCSSA); - - // Successful peeling may result in a change in the loop preheader/trip - // counts. If we later unroll the loop, we want these to be updated. - if (Peeled) { - // According to our guards and profitability checks the only - // meaningful exit should be latch block. Other exits go to deopt, - // so we do not worry about them. - BasicBlock *ExitingBlock = L->getLoopLatch(); - assert(ExitingBlock && "Loop without exiting block?"); - assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?"); - Preheader = L->getLoopPreheader(); - ULO.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); - ULO.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); - } - } - - // Loops containing convergent instructions must have a count that divides - // their TripMultiple. - LLVM_DEBUG( - { - bool HasConvergent = false; - for (auto &BB : L->blocks()) - for (auto &I : *BB) - if (auto CS = CallSite(&I)) - HasConvergent |= CS.isConvergent(); - assert((!HasConvergent || ULO.TripMultiple % ULO.Count == 0) && - "Unroll count must divide trip multiple if loop contains a " - "convergent operation."); - }); - - bool EpilogProfitability = - UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog - : isEpilogProfitable(L); - - if (RuntimeTripCount && ULO.TripMultiple % ULO.Count != 0 && - !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount, - EpilogProfitability, ULO.UnrollRemainder, - ULO.ForgetAllSCEV, LI, SE, DT, AC, - PreserveLCSSA, RemainderLoop)) { - if (ULO.Force) - RuntimeTripCount = false; - else { - LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be " - "generated when assuming runtime trip count\n"); - return LoopUnrollResult::Unmodified; - } - } - - // If we know the trip count, we know the multiple... - unsigned BreakoutTrip = 0; - if (ULO.TripCount != 0) { - BreakoutTrip = ULO.TripCount % ULO.Count; - ULO.TripMultiple = 0; - } else { - // Figure out what multiple to use. - BreakoutTrip = ULO.TripMultiple = - (unsigned)GreatestCommonDivisor64(ULO.Count, ULO.TripMultiple); - } - - using namespace ore; - // Report the unrolling decision. - if (CompletelyUnroll) { - LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() - << " with trip count " << ULO.TripCount << "!\n"); - if (ORE) - ORE->emit([&]() { - return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), - L->getHeader()) - << "completely unrolled loop with " - << NV("UnrollCount", ULO.TripCount) << " iterations"; - }); - } else if (ULO.PeelCount) { - LLVM_DEBUG(dbgs() << "PEELING loop %" << Header->getName() - << " with iteration count " << ULO.PeelCount << "!\n"); - if (ORE) - ORE->emit([&]() { - return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(), - L->getHeader()) - << " peeled loop by " << NV("PeelCount", ULO.PeelCount) - << " iterations"; - }); - } else { - auto DiagBuilder = [&]() { - OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), - L->getHeader()); - return Diag << "unrolled loop by a factor of " - << NV("UnrollCount", ULO.Count); - }; - - LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " - << ULO.Count); - if (ULO.TripMultiple == 0 || BreakoutTrip != ULO.TripMultiple) { - LLVM_DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); - if (ORE) - ORE->emit([&]() { - return DiagBuilder() << " with a breakout at trip " - << NV("BreakoutTrip", BreakoutTrip); - }); - } else if (ULO.TripMultiple != 1) { - LLVM_DEBUG(dbgs() << " with " << ULO.TripMultiple << " trips per branch"); - if (ORE) - ORE->emit([&]() { - return DiagBuilder() - << " with " << NV("TripMultiple", ULO.TripMultiple) - << " trips per branch"; - }); - } else if (RuntimeTripCount) { - LLVM_DEBUG(dbgs() << " with run-time trip count"); - if (ORE) - ORE->emit( - [&]() { return DiagBuilder() << " with run-time trip count"; }); - } - LLVM_DEBUG(dbgs() << "!\n"); - } - - // We are going to make changes to this loop. SCEV may be keeping cached info - // about it, in particular about backedge taken count. The changes we make - // are guaranteed to invalidate this information for our loop. It is tempting - // to only invalidate the loop being unrolled, but it is incorrect as long as - // all exiting branches from all inner loops have impact on the outer loops, - // and if something changes inside them then any of outer loops may also - // change. When we forget outermost loop, we also forget all contained loops - // and this is what we need here. - if (SE) { - if (ULO.ForgetAllSCEV) - SE->forgetAllLoops(); - else - SE->forgetTopmostLoop(L); - } - - bool ContinueOnTrue; - bool LatchIsExiting = BI->isConditional(); - BasicBlock *LoopExit = nullptr; - if (LatchIsExiting) { - ContinueOnTrue = L->contains(BI->getSuccessor(0)); - LoopExit = BI->getSuccessor(ContinueOnTrue); - } else { - NumUnrolledWithHeader++; - ContinueOnTrue = L->contains(HeaderBI->getSuccessor(0)); - LoopExit = HeaderBI->getSuccessor(ContinueOnTrue); - } - - // For the first iteration of the loop, we should use the precloned values for - // PHI nodes. Insert associations now. - ValueToValueMapTy LastValueMap; - std::vector<PHINode*> OrigPHINode; - for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { - OrigPHINode.push_back(cast<PHINode>(I)); - } - - std::vector<BasicBlock *> Headers; - std::vector<BasicBlock *> HeaderSucc; - std::vector<BasicBlock *> Latches; - Headers.push_back(Header); - Latches.push_back(LatchBlock); - - if (!LatchIsExiting) { - auto *Term = cast<BranchInst>(Header->getTerminator()); - if (Term->isUnconditional() || L->contains(Term->getSuccessor(0))) { - assert(L->contains(Term->getSuccessor(0))); - HeaderSucc.push_back(Term->getSuccessor(0)); - } else { - assert(L->contains(Term->getSuccessor(1))); - HeaderSucc.push_back(Term->getSuccessor(1)); - } - } - - // The current on-the-fly SSA update requires blocks to be processed in - // reverse postorder so that LastValueMap contains the correct value at each - // exit. - LoopBlocksDFS DFS(L); - DFS.perform(LI); - - // Stash the DFS iterators before adding blocks to the loop. - LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); - LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); - - std::vector<BasicBlock*> UnrolledLoopBlocks = L->getBlocks(); - - // Loop Unrolling might create new loops. While we do preserve LoopInfo, we - // might break loop-simplified form for these loops (as they, e.g., would - // share the same exit blocks). We'll keep track of loops for which we can - // break this so that later we can re-simplify them. - SmallSetVector<Loop *, 4> LoopsToSimplify; - for (Loop *SubLoop : *L) - LoopsToSimplify.insert(SubLoop); - - if (Header->getParent()->isDebugInfoForProfiling()) - for (BasicBlock *BB : L->getBlocks()) - for (Instruction &I : *BB) - if (!isa<DbgInfoIntrinsic>(&I)) - if (const DILocation *DIL = I.getDebugLoc()) { - auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(ULO.Count); - if (NewDIL) - I.setDebugLoc(NewDIL.getValue()); - else - LLVM_DEBUG(dbgs() - << "Failed to create new discriminator: " - << DIL->getFilename() << " Line: " << DIL->getLine()); - } - - for (unsigned It = 1; It != ULO.Count; ++It) { - std::vector<BasicBlock*> NewBlocks; - SmallDenseMap<const Loop *, Loop *, 4> NewLoops; - NewLoops[L] = L; - - for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { - ValueToValueMapTy VMap; - BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); - Header->getParent()->getBasicBlockList().push_back(New); - - assert((*BB != Header || LI->getLoopFor(*BB) == L) && - "Header should not be in a sub-loop"); - // Tell LI about New. - const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops); - if (OldLoop) - LoopsToSimplify.insert(NewLoops[OldLoop]); - - if (*BB == Header) - // Loop over all of the PHI nodes in the block, changing them to use - // the incoming values from the previous block. - for (PHINode *OrigPHI : OrigPHINode) { - PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]); - Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); - if (Instruction *InValI = dyn_cast<Instruction>(InVal)) - if (It > 1 && L->contains(InValI)) - InVal = LastValueMap[InValI]; - VMap[OrigPHI] = InVal; - New->getInstList().erase(NewPHI); - } - - // Update our running map of newest clones - LastValueMap[*BB] = New; - for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); - VI != VE; ++VI) - LastValueMap[VI->first] = VI->second; - - // Add phi entries for newly created values to all exit blocks. - for (BasicBlock *Succ : successors(*BB)) { - if (L->contains(Succ)) - continue; - for (PHINode &PHI : Succ->phis()) { - Value *Incoming = PHI.getIncomingValueForBlock(*BB); - ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); - if (It != LastValueMap.end()) - Incoming = It->second; - PHI.addIncoming(Incoming, New); - } - } - // Keep track of new headers and latches as we create them, so that - // we can insert the proper branches later. - if (*BB == Header) - Headers.push_back(New); - if (*BB == LatchBlock) - Latches.push_back(New); - - // Keep track of the successor of the new header in the current iteration. - for (auto *Pred : predecessors(*BB)) - if (Pred == Header) { - HeaderSucc.push_back(New); - break; - } - - NewBlocks.push_back(New); - UnrolledLoopBlocks.push_back(New); - - // Update DomTree: since we just copy the loop body, and each copy has a - // dedicated entry block (copy of the header block), this header's copy - // dominates all copied blocks. That means, dominance relations in the - // copied body are the same as in the original body. - if (DT) { - if (*BB == Header) - DT->addNewBlock(New, Latches[It - 1]); - else { - auto BBDomNode = DT->getNode(*BB); - auto BBIDom = BBDomNode->getIDom(); - BasicBlock *OriginalBBIDom = BBIDom->getBlock(); - DT->addNewBlock( - New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)])); - } - } - } - - // Remap all instructions in the most recent iteration - for (BasicBlock *NewBlock : NewBlocks) { - for (Instruction &I : *NewBlock) { - ::remapInstruction(&I, LastValueMap); - if (auto *II = dyn_cast<IntrinsicInst>(&I)) - if (II->getIntrinsicID() == Intrinsic::assume) - AC->registerAssumption(II); - } - } - } - - // Loop over the PHI nodes in the original block, setting incoming values. - for (PHINode *PN : OrigPHINode) { - if (CompletelyUnroll) { - PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); - Header->getInstList().erase(PN); - } else if (ULO.Count > 1) { - Value *InVal = PN->removeIncomingValue(LatchBlock, false); - // If this value was defined in the loop, take the value defined by the - // last iteration of the loop. - if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { - if (L->contains(InValI)) - InVal = LastValueMap[InVal]; - } - assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch"); - PN->addIncoming(InVal, Latches.back()); - } - } - - auto setDest = [LoopExit, ContinueOnTrue](BasicBlock *Src, BasicBlock *Dest, - ArrayRef<BasicBlock *> NextBlocks, - BasicBlock *BlockInLoop, - bool NeedConditional) { - auto *Term = cast<BranchInst>(Src->getTerminator()); - if (NeedConditional) { - // Update the conditional branch's successor for the following - // iteration. - Term->setSuccessor(!ContinueOnTrue, Dest); - } else { - // Remove phi operands at this loop exit - if (Dest != LoopExit) { - BasicBlock *BB = Src; - for (BasicBlock *Succ : successors(BB)) { - // Preserve the incoming value from BB if we are jumping to the block - // in the current loop. - if (Succ == BlockInLoop) - continue; - for (PHINode &Phi : Succ->phis()) - Phi.removeIncomingValue(BB, false); - } - } - // Replace the conditional branch with an unconditional one. - BranchInst::Create(Dest, Term); - Term->eraseFromParent(); - } - }; - - // Now that all the basic blocks for the unrolled iterations are in place, - // set up the branches to connect them. - if (LatchIsExiting) { - // Set up latches to branch to the new header in the unrolled iterations or - // the loop exit for the last latch in a fully unrolled loop. - for (unsigned i = 0, e = Latches.size(); i != e; ++i) { - // The branch destination. - unsigned j = (i + 1) % e; - BasicBlock *Dest = Headers[j]; - bool NeedConditional = true; - - if (RuntimeTripCount && j != 0) { - NeedConditional = false; - } - - // For a complete unroll, make the last iteration end with a branch - // to the exit block. - if (CompletelyUnroll) { - if (j == 0) - Dest = LoopExit; - // If using trip count upper bound to completely unroll, we need to keep - // the conditional branch except the last one because the loop may exit - // after any iteration. - assert(NeedConditional && - "NeedCondition cannot be modified by both complete " - "unrolling and runtime unrolling"); - NeedConditional = - (ULO.PreserveCondBr && j && !(ULO.PreserveOnlyFirst && i != 0)); - } else if (j != BreakoutTrip && - (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) { - // If we know the trip count or a multiple of it, we can safely use an - // unconditional branch for some iterations. - NeedConditional = false; - } - - setDest(Latches[i], Dest, Headers, Headers[i], NeedConditional); - } - } else { - // Setup headers to branch to their new successors in the unrolled - // iterations. - for (unsigned i = 0, e = Headers.size(); i != e; ++i) { - // The branch destination. - unsigned j = (i + 1) % e; - BasicBlock *Dest = HeaderSucc[i]; - bool NeedConditional = true; - - if (RuntimeTripCount && j != 0) - NeedConditional = false; - - if (CompletelyUnroll) - // We cannot drop the conditional branch for the last condition, as we - // may have to execute the loop body depending on the condition. - NeedConditional = j == 0 || ULO.PreserveCondBr; - else if (j != BreakoutTrip && - (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) - // If we know the trip count or a multiple of it, we can safely use an - // unconditional branch for some iterations. - NeedConditional = false; - - setDest(Headers[i], Dest, Headers, HeaderSucc[i], NeedConditional); - } - - // Set up latches to branch to the new header in the unrolled iterations or - // the loop exit for the last latch in a fully unrolled loop. - - for (unsigned i = 0, e = Latches.size(); i != e; ++i) { - // The original branch was replicated in each unrolled iteration. - BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); - - // The branch destination. - unsigned j = (i + 1) % e; - BasicBlock *Dest = Headers[j]; - - // When completely unrolling, the last latch becomes unreachable. - if (CompletelyUnroll && j == 0) - new UnreachableInst(Term->getContext(), Term); - else - // Replace the conditional branch with an unconditional one. - BranchInst::Create(Dest, Term); - - Term->eraseFromParent(); - } - } - - // Update dominators of blocks we might reach through exits. - // Immediate dominator of such block might change, because we add more - // routes which can lead to the exit: we can now reach it from the copied - // iterations too. - if (DT && ULO.Count > 1) { - for (auto *BB : OriginalLoopBlocks) { - auto *BBDomNode = DT->getNode(BB); - SmallVector<BasicBlock *, 16> ChildrenToUpdate; - for (auto *ChildDomNode : BBDomNode->getChildren()) { - auto *ChildBB = ChildDomNode->getBlock(); - if (!L->contains(ChildBB)) - ChildrenToUpdate.push_back(ChildBB); - } - BasicBlock *NewIDom; - BasicBlock *&TermBlock = LatchIsExiting ? LatchBlock : Header; - auto &TermBlocks = LatchIsExiting ? Latches : Headers; - if (BB == TermBlock) { - // The latch is special because we emit unconditional branches in - // some cases where the original loop contained a conditional branch. - // Since the latch is always at the bottom of the loop, if the latch - // dominated an exit before unrolling, the new dominator of that exit - // must also be a latch. Specifically, the dominator is the first - // latch which ends in a conditional branch, or the last latch if - // there is no such latch. - // For loops exiting from the header, we limit the supported loops - // to have a single exiting block. - NewIDom = TermBlocks.back(); - for (BasicBlock *Iter : TermBlocks) { - Instruction *Term = Iter->getTerminator(); - if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) { - NewIDom = Iter; - break; - } - } - } else { - // The new idom of the block will be the nearest common dominator - // of all copies of the previous idom. This is equivalent to the - // nearest common dominator of the previous idom and the first latch, - // which dominates all copies of the previous idom. - NewIDom = DT->findNearestCommonDominator(BB, LatchBlock); - } - for (auto *ChildBB : ChildrenToUpdate) - DT->changeImmediateDominator(ChildBB, NewIDom); - } - } - - assert(!DT || !UnrollVerifyDomtree || - DT->verify(DominatorTree::VerificationLevel::Fast)); - - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); - // Merge adjacent basic blocks, if possible. - for (BasicBlock *Latch : Latches) { - BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator()); - assert((Term || - (CompletelyUnroll && !LatchIsExiting && Latch == Latches.back())) && - "Need a branch as terminator, except when fully unrolling with " - "unconditional latch"); - if (Term && Term->isUnconditional()) { - BasicBlock *Dest = Term->getSuccessor(0); - BasicBlock *Fold = Dest->getUniquePredecessor(); - if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) { - // Dest has been folded into Fold. Update our worklists accordingly. - std::replace(Latches.begin(), Latches.end(), Dest, Fold); - UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(), - UnrolledLoopBlocks.end(), Dest), - UnrolledLoopBlocks.end()); - } - } - } - - // At this point, the code is well formed. We now simplify the unrolled loop, - // doing constant propagation and dead code elimination as we go. - simplifyLoopAfterUnroll(L, !CompletelyUnroll && (ULO.Count > 1 || Peeled), LI, - SE, DT, AC); - - NumCompletelyUnrolled += CompletelyUnroll; - ++NumUnrolled; - - Loop *OuterL = L->getParentLoop(); - // Update LoopInfo if the loop is completely removed. - if (CompletelyUnroll) - LI->erase(L); - - // After complete unrolling most of the blocks should be contained in OuterL. - // However, some of them might happen to be out of OuterL (e.g. if they - // precede a loop exit). In this case we might need to insert PHI nodes in - // order to preserve LCSSA form. - // We don't need to check this if we already know that we need to fix LCSSA - // form. - // TODO: For now we just recompute LCSSA for the outer loop in this case, but - // it should be possible to fix it in-place. - if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA) - NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI); - - // If we have a pass and a DominatorTree we should re-simplify impacted loops - // to ensure subsequent analyses can rely on this form. We want to simplify - // at least one layer outside of the loop that was unrolled so that any - // changes to the parent loop exposed by the unrolling are considered. - if (DT) { - if (OuterL) { - // OuterL includes all loops for which we can break loop-simplify, so - // it's sufficient to simplify only it (it'll recursively simplify inner - // loops too). - if (NeedToFixLCSSA) { - // LCSSA must be performed on the outermost affected loop. The unrolled - // loop's last loop latch is guaranteed to be in the outermost loop - // after LoopInfo's been updated by LoopInfo::erase. - Loop *LatchLoop = LI->getLoopFor(Latches.back()); - Loop *FixLCSSALoop = OuterL; - if (!FixLCSSALoop->contains(LatchLoop)) - while (FixLCSSALoop->getParentLoop() != LatchLoop) - FixLCSSALoop = FixLCSSALoop->getParentLoop(); - - formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE); - } else if (PreserveLCSSA) { - assert(OuterL->isLCSSAForm(*DT) && - "Loops should be in LCSSA form after loop-unroll."); - } - - // TODO: That potentially might be compile-time expensive. We should try - // to fix the loop-simplified form incrementally. - simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA); - } else { - // Simplify loops for which we might've broken loop-simplify form. - for (Loop *SubLoop : LoopsToSimplify) - simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA); - } - } - - return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled - : LoopUnrollResult::PartiallyUnrolled; -} - -/// Given an llvm.loop loop id metadata node, returns the loop hint metadata -/// node with the given name (for example, "llvm.loop.unroll.count"). If no -/// such metadata node exists, then nullptr is returned. -MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) { - // First operand should refer to the loop id itself. - assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); - assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); - - for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { - MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); - if (!MD) - continue; - - MDString *S = dyn_cast<MDString>(MD->getOperand(0)); - if (!S) - continue; - - if (Name.equals(S->getString())) - return MD; - } - return nullptr; -} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp deleted file mode 100644 index ff49d83f25c5..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp +++ /dev/null @@ -1,820 +0,0 @@ -//===-- LoopUnrollAndJam.cpp - Loop unrolling utilities -------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements loop unroll and jam as a routine, much like -// LoopUnroll.cpp implements loop unroll. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/DependenceAnalysis.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LoopAnalysisManager.h" -#include "llvm/Analysis/LoopIterator.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/Analysis/Utils/Local.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/LoopSimplify.h" -#include "llvm/Transforms/Utils/LoopUtils.h" -#include "llvm/Transforms/Utils/SimplifyIndVar.h" -#include "llvm/Transforms/Utils/UnrollLoop.h" -using namespace llvm; - -#define DEBUG_TYPE "loop-unroll-and-jam" - -STATISTIC(NumUnrolledAndJammed, "Number of loops unroll and jammed"); -STATISTIC(NumCompletelyUnrolledAndJammed, "Number of loops unroll and jammed"); - -typedef SmallPtrSet<BasicBlock *, 4> BasicBlockSet; - -// Partition blocks in an outer/inner loop pair into blocks before and after -// the loop -static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop, - BasicBlockSet &ForeBlocks, - BasicBlockSet &SubLoopBlocks, - BasicBlockSet &AftBlocks, - DominatorTree *DT) { - BasicBlock *SubLoopLatch = SubLoop->getLoopLatch(); - SubLoopBlocks.insert(SubLoop->block_begin(), SubLoop->block_end()); - - for (BasicBlock *BB : L->blocks()) { - if (!SubLoop->contains(BB)) { - if (DT->dominates(SubLoopLatch, BB)) - AftBlocks.insert(BB); - else - ForeBlocks.insert(BB); - } - } - - // Check that all blocks in ForeBlocks together dominate the subloop - // TODO: This might ideally be done better with a dominator/postdominators. - BasicBlock *SubLoopPreHeader = SubLoop->getLoopPreheader(); - for (BasicBlock *BB : ForeBlocks) { - if (BB == SubLoopPreHeader) - continue; - Instruction *TI = BB->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - if (!ForeBlocks.count(TI->getSuccessor(i))) - return false; - } - - return true; -} - -// Looks at the phi nodes in Header for values coming from Latch. For these -// instructions and all their operands calls Visit on them, keeping going for -// all the operands in AftBlocks. Returns false if Visit returns false, -// otherwise returns true. This is used to process the instructions in the -// Aft blocks that need to be moved before the subloop. It is used in two -// places. One to check that the required set of instructions can be moved -// before the loop. Then to collect the instructions to actually move in -// moveHeaderPhiOperandsToForeBlocks. -template <typename T> -static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch, - BasicBlockSet &AftBlocks, T Visit) { - SmallVector<Instruction *, 8> Worklist; - for (auto &Phi : Header->phis()) { - Value *V = Phi.getIncomingValueForBlock(Latch); - if (Instruction *I = dyn_cast<Instruction>(V)) - Worklist.push_back(I); - } - - while (!Worklist.empty()) { - Instruction *I = Worklist.back(); - Worklist.pop_back(); - if (!Visit(I)) - return false; - - if (AftBlocks.count(I->getParent())) - for (auto &U : I->operands()) - if (Instruction *II = dyn_cast<Instruction>(U)) - Worklist.push_back(II); - } - - return true; -} - -// Move the phi operands of Header from Latch out of AftBlocks to InsertLoc. -static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header, - BasicBlock *Latch, - Instruction *InsertLoc, - BasicBlockSet &AftBlocks) { - // We need to ensure we move the instructions in the correct order, - // starting with the earliest required instruction and moving forward. - std::vector<Instruction *> Visited; - processHeaderPhiOperands(Header, Latch, AftBlocks, - [&Visited, &AftBlocks](Instruction *I) { - if (AftBlocks.count(I->getParent())) - Visited.push_back(I); - return true; - }); - - // Move all instructions in program order to before the InsertLoc - BasicBlock *InsertLocBB = InsertLoc->getParent(); - for (Instruction *I : reverse(Visited)) { - if (I->getParent() != InsertLocBB) - I->moveBefore(InsertLoc); - } -} - -/* - This method performs Unroll and Jam. For a simple loop like: - for (i = ..) - Fore(i) - for (j = ..) - SubLoop(i, j) - Aft(i) - - Instead of doing normal inner or outer unrolling, we do: - for (i = .., i+=2) - Fore(i) - Fore(i+1) - for (j = ..) - SubLoop(i, j) - SubLoop(i+1, j) - Aft(i) - Aft(i+1) - - So the outer loop is essetially unrolled and then the inner loops are fused - ("jammed") together into a single loop. This can increase speed when there - are loads in SubLoop that are invariant to i, as they become shared between - the now jammed inner loops. - - We do this by spliting the blocks in the loop into Fore, Subloop and Aft. - Fore blocks are those before the inner loop, Aft are those after. Normal - Unroll code is used to copy each of these sets of blocks and the results are - combined together into the final form above. - - isSafeToUnrollAndJam should be used prior to calling this to make sure the - unrolling will be valid. Checking profitablility is also advisable. - - If EpilogueLoop is non-null, it receives the epilogue loop (if it was - necessary to create one and not fully unrolled). -*/ -LoopUnrollResult llvm::UnrollAndJamLoop( - Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, - bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, - AssumptionCache *AC, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop) { - - // When we enter here we should have already checked that it is safe - BasicBlock *Header = L->getHeader(); - assert(L->getSubLoops().size() == 1); - Loop *SubLoop = *L->begin(); - - // Don't enter the unroll code if there is nothing to do. - if (TripCount == 0 && Count < 2) { - LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; almost nothing to do\n"); - return LoopUnrollResult::Unmodified; - } - - assert(Count > 0); - assert(TripMultiple > 0); - assert(TripCount == 0 || TripCount % TripMultiple == 0); - - // Are we eliminating the loop control altogether? - bool CompletelyUnroll = (Count == TripCount); - - // We use the runtime remainder in cases where we don't know trip multiple - if (TripMultiple == 1 || TripMultiple % Count != 0) { - if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false, - /*UseEpilogRemainder*/ true, - UnrollRemainder, /*ForgetAllSCEV*/ false, - LI, SE, DT, AC, true, EpilogueLoop)) { - LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be " - "generated when assuming runtime trip count\n"); - return LoopUnrollResult::Unmodified; - } - } - - // Notify ScalarEvolution that the loop will be substantially changed, - // if not outright eliminated. - if (SE) { - SE->forgetLoop(L); - SE->forgetLoop(SubLoop); - } - - using namespace ore; - // Report the unrolling decision. - if (CompletelyUnroll) { - LLVM_DEBUG(dbgs() << "COMPLETELY UNROLL AND JAMMING loop %" - << Header->getName() << " with trip count " << TripCount - << "!\n"); - ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), - L->getHeader()) - << "completely unroll and jammed loop with " - << NV("UnrollCount", TripCount) << " iterations"); - } else { - auto DiagBuilder = [&]() { - OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), - L->getHeader()); - return Diag << "unroll and jammed loop by a factor of " - << NV("UnrollCount", Count); - }; - - LLVM_DEBUG(dbgs() << "UNROLL AND JAMMING loop %" << Header->getName() - << " by " << Count); - if (TripMultiple != 1) { - LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); - ORE->emit([&]() { - return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple) - << " trips per branch"; - }); - } else { - LLVM_DEBUG(dbgs() << " with run-time trip count"); - ORE->emit([&]() { return DiagBuilder() << " with run-time trip count"; }); - } - LLVM_DEBUG(dbgs() << "!\n"); - } - - BasicBlock *Preheader = L->getLoopPreheader(); - BasicBlock *LatchBlock = L->getLoopLatch(); - BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); - assert(Preheader && LatchBlock && Header); - assert(BI && !BI->isUnconditional()); - bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); - BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); - bool SubLoopContinueOnTrue = SubLoop->contains( - SubLoop->getLoopLatch()->getTerminator()->getSuccessor(0)); - - // Partition blocks in an outer/inner loop pair into blocks before and after - // the loop - BasicBlockSet SubLoopBlocks; - BasicBlockSet ForeBlocks; - BasicBlockSet AftBlocks; - partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, AftBlocks, - DT); - - // We keep track of the entering/first and exiting/last block of each of - // Fore/SubLoop/Aft in each iteration. This helps make the stapling up of - // blocks easier. - std::vector<BasicBlock *> ForeBlocksFirst; - std::vector<BasicBlock *> ForeBlocksLast; - std::vector<BasicBlock *> SubLoopBlocksFirst; - std::vector<BasicBlock *> SubLoopBlocksLast; - std::vector<BasicBlock *> AftBlocksFirst; - std::vector<BasicBlock *> AftBlocksLast; - ForeBlocksFirst.push_back(Header); - ForeBlocksLast.push_back(SubLoop->getLoopPreheader()); - SubLoopBlocksFirst.push_back(SubLoop->getHeader()); - SubLoopBlocksLast.push_back(SubLoop->getExitingBlock()); - AftBlocksFirst.push_back(SubLoop->getExitBlock()); - AftBlocksLast.push_back(L->getExitingBlock()); - // Maps Blocks[0] -> Blocks[It] - ValueToValueMapTy LastValueMap; - - // Move any instructions from fore phi operands from AftBlocks into Fore. - moveHeaderPhiOperandsToForeBlocks( - Header, LatchBlock, SubLoop->getLoopPreheader()->getTerminator(), - AftBlocks); - - // The current on-the-fly SSA update requires blocks to be processed in - // reverse postorder so that LastValueMap contains the correct value at each - // exit. - LoopBlocksDFS DFS(L); - DFS.perform(LI); - // Stash the DFS iterators before adding blocks to the loop. - LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); - LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); - - if (Header->getParent()->isDebugInfoForProfiling()) - for (BasicBlock *BB : L->getBlocks()) - for (Instruction &I : *BB) - if (!isa<DbgInfoIntrinsic>(&I)) - if (const DILocation *DIL = I.getDebugLoc()) { - auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(Count); - if (NewDIL) - I.setDebugLoc(NewDIL.getValue()); - else - LLVM_DEBUG(dbgs() - << "Failed to create new discriminator: " - << DIL->getFilename() << " Line: " << DIL->getLine()); - } - - // Copy all blocks - for (unsigned It = 1; It != Count; ++It) { - std::vector<BasicBlock *> NewBlocks; - // Maps Blocks[It] -> Blocks[It-1] - DenseMap<Value *, Value *> PrevItValueMap; - - for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { - ValueToValueMapTy VMap; - BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); - Header->getParent()->getBasicBlockList().push_back(New); - - if (ForeBlocks.count(*BB)) { - L->addBasicBlockToLoop(New, *LI); - - if (*BB == ForeBlocksFirst[0]) - ForeBlocksFirst.push_back(New); - if (*BB == ForeBlocksLast[0]) - ForeBlocksLast.push_back(New); - } else if (SubLoopBlocks.count(*BB)) { - SubLoop->addBasicBlockToLoop(New, *LI); - - if (*BB == SubLoopBlocksFirst[0]) - SubLoopBlocksFirst.push_back(New); - if (*BB == SubLoopBlocksLast[0]) - SubLoopBlocksLast.push_back(New); - } else if (AftBlocks.count(*BB)) { - L->addBasicBlockToLoop(New, *LI); - - if (*BB == AftBlocksFirst[0]) - AftBlocksFirst.push_back(New); - if (*BB == AftBlocksLast[0]) - AftBlocksLast.push_back(New); - } else { - llvm_unreachable("BB being cloned should be in Fore/Sub/Aft"); - } - - // Update our running maps of newest clones - PrevItValueMap[New] = (It == 1 ? *BB : LastValueMap[*BB]); - LastValueMap[*BB] = New; - for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); - VI != VE; ++VI) { - PrevItValueMap[VI->second] = - const_cast<Value *>(It == 1 ? VI->first : LastValueMap[VI->first]); - LastValueMap[VI->first] = VI->second; - } - - NewBlocks.push_back(New); - - // Update DomTree: - if (*BB == ForeBlocksFirst[0]) - DT->addNewBlock(New, ForeBlocksLast[It - 1]); - else if (*BB == SubLoopBlocksFirst[0]) - DT->addNewBlock(New, SubLoopBlocksLast[It - 1]); - else if (*BB == AftBlocksFirst[0]) - DT->addNewBlock(New, AftBlocksLast[It - 1]); - else { - // Each set of blocks (Fore/Sub/Aft) will have the same internal domtree - // structure. - auto BBDomNode = DT->getNode(*BB); - auto BBIDom = BBDomNode->getIDom(); - BasicBlock *OriginalBBIDom = BBIDom->getBlock(); - assert(OriginalBBIDom); - assert(LastValueMap[cast<Value>(OriginalBBIDom)]); - DT->addNewBlock( - New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)])); - } - } - - // Remap all instructions in the most recent iteration - for (BasicBlock *NewBlock : NewBlocks) { - for (Instruction &I : *NewBlock) { - ::remapInstruction(&I, LastValueMap); - if (auto *II = dyn_cast<IntrinsicInst>(&I)) - if (II->getIntrinsicID() == Intrinsic::assume) - AC->registerAssumption(II); - } - } - - // Alter the ForeBlocks phi's, pointing them at the latest version of the - // value from the previous iteration's phis - for (PHINode &Phi : ForeBlocksFirst[It]->phis()) { - Value *OldValue = Phi.getIncomingValueForBlock(AftBlocksLast[It]); - assert(OldValue && "should have incoming edge from Aft[It]"); - Value *NewValue = OldValue; - if (Value *PrevValue = PrevItValueMap[OldValue]) - NewValue = PrevValue; - - assert(Phi.getNumOperands() == 2); - Phi.setIncomingBlock(0, ForeBlocksLast[It - 1]); - Phi.setIncomingValue(0, NewValue); - Phi.removeIncomingValue(1); - } - } - - // Now that all the basic blocks for the unrolled iterations are in place, - // finish up connecting the blocks and phi nodes. At this point LastValueMap - // is the last unrolled iterations values. - - // Update Phis in BB from OldBB to point to NewBB - auto updatePHIBlocks = [](BasicBlock *BB, BasicBlock *OldBB, - BasicBlock *NewBB) { - for (PHINode &Phi : BB->phis()) { - int I = Phi.getBasicBlockIndex(OldBB); - Phi.setIncomingBlock(I, NewBB); - } - }; - // Update Phis in BB from OldBB to point to NewBB and use the latest value - // from LastValueMap - auto updatePHIBlocksAndValues = [](BasicBlock *BB, BasicBlock *OldBB, - BasicBlock *NewBB, - ValueToValueMapTy &LastValueMap) { - for (PHINode &Phi : BB->phis()) { - for (unsigned b = 0; b < Phi.getNumIncomingValues(); ++b) { - if (Phi.getIncomingBlock(b) == OldBB) { - Value *OldValue = Phi.getIncomingValue(b); - if (Value *LastValue = LastValueMap[OldValue]) - Phi.setIncomingValue(b, LastValue); - Phi.setIncomingBlock(b, NewBB); - break; - } - } - } - }; - // Move all the phis from Src into Dest - auto movePHIs = [](BasicBlock *Src, BasicBlock *Dest) { - Instruction *insertPoint = Dest->getFirstNonPHI(); - while (PHINode *Phi = dyn_cast<PHINode>(Src->begin())) - Phi->moveBefore(insertPoint); - }; - - // Update the PHI values outside the loop to point to the last block - updatePHIBlocksAndValues(LoopExit, AftBlocksLast[0], AftBlocksLast.back(), - LastValueMap); - - // Update ForeBlocks successors and phi nodes - BranchInst *ForeTerm = - cast<BranchInst>(ForeBlocksLast.back()->getTerminator()); - BasicBlock *Dest = SubLoopBlocksFirst[0]; - ForeTerm->setSuccessor(0, Dest); - - if (CompletelyUnroll) { - while (PHINode *Phi = dyn_cast<PHINode>(ForeBlocksFirst[0]->begin())) { - Phi->replaceAllUsesWith(Phi->getIncomingValueForBlock(Preheader)); - Phi->getParent()->getInstList().erase(Phi); - } - } else { - // Update the PHI values to point to the last aft block - updatePHIBlocksAndValues(ForeBlocksFirst[0], AftBlocksLast[0], - AftBlocksLast.back(), LastValueMap); - } - - for (unsigned It = 1; It != Count; It++) { - // Remap ForeBlock successors from previous iteration to this - BranchInst *ForeTerm = - cast<BranchInst>(ForeBlocksLast[It - 1]->getTerminator()); - BasicBlock *Dest = ForeBlocksFirst[It]; - ForeTerm->setSuccessor(0, Dest); - } - - // Subloop successors and phis - BranchInst *SubTerm = - cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator()); - SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]); - SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]); - updatePHIBlocks(SubLoopBlocksFirst[0], ForeBlocksLast[0], - ForeBlocksLast.back()); - updatePHIBlocks(SubLoopBlocksFirst[0], SubLoopBlocksLast[0], - SubLoopBlocksLast.back()); - - for (unsigned It = 1; It != Count; It++) { - // Replace the conditional branch of the previous iteration subloop with an - // unconditional one to this one - BranchInst *SubTerm = - cast<BranchInst>(SubLoopBlocksLast[It - 1]->getTerminator()); - BranchInst::Create(SubLoopBlocksFirst[It], SubTerm); - SubTerm->eraseFromParent(); - - updatePHIBlocks(SubLoopBlocksFirst[It], ForeBlocksLast[It], - ForeBlocksLast.back()); - updatePHIBlocks(SubLoopBlocksFirst[It], SubLoopBlocksLast[It], - SubLoopBlocksLast.back()); - movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]); - } - - // Aft blocks successors and phis - BranchInst *Term = cast<BranchInst>(AftBlocksLast.back()->getTerminator()); - if (CompletelyUnroll) { - BranchInst::Create(LoopExit, Term); - Term->eraseFromParent(); - } else { - Term->setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]); - } - updatePHIBlocks(AftBlocksFirst[0], SubLoopBlocksLast[0], - SubLoopBlocksLast.back()); - - for (unsigned It = 1; It != Count; It++) { - // Replace the conditional branch of the previous iteration subloop with an - // unconditional one to this one - BranchInst *AftTerm = - cast<BranchInst>(AftBlocksLast[It - 1]->getTerminator()); - BranchInst::Create(AftBlocksFirst[It], AftTerm); - AftTerm->eraseFromParent(); - - updatePHIBlocks(AftBlocksFirst[It], SubLoopBlocksLast[It], - SubLoopBlocksLast.back()); - movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]); - } - - // Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the - // new ones required. - if (Count != 1) { - SmallVector<DominatorTree::UpdateType, 4> DTUpdates; - DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, ForeBlocksLast[0], - SubLoopBlocksFirst[0]); - DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, - SubLoopBlocksLast[0], AftBlocksFirst[0]); - - DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert, - ForeBlocksLast.back(), SubLoopBlocksFirst[0]); - DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert, - SubLoopBlocksLast.back(), AftBlocksFirst[0]); - DT->applyUpdates(DTUpdates); - } - - // Merge adjacent basic blocks, if possible. - SmallPtrSet<BasicBlock *, 16> MergeBlocks; - MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end()); - MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end()); - MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end()); - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); - while (!MergeBlocks.empty()) { - BasicBlock *BB = *MergeBlocks.begin(); - BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator()); - if (Term && Term->isUnconditional() && L->contains(Term->getSuccessor(0))) { - BasicBlock *Dest = Term->getSuccessor(0); - BasicBlock *Fold = Dest->getUniquePredecessor(); - if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) { - // Don't remove BB and add Fold as they are the same BB - assert(Fold == BB); - (void)Fold; - MergeBlocks.erase(Dest); - } else - MergeBlocks.erase(BB); - } else - MergeBlocks.erase(BB); - } - - // At this point, the code is well formed. We now do a quick sweep over the - // inserted code, doing constant propagation and dead code elimination as we - // go. - simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC); - simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC); - - NumCompletelyUnrolledAndJammed += CompletelyUnroll; - ++NumUnrolledAndJammed; - -#ifndef NDEBUG - // We shouldn't have done anything to break loop simplify form or LCSSA. - Loop *OuterL = L->getParentLoop(); - Loop *OutestLoop = OuterL ? OuterL : (!CompletelyUnroll ? L : SubLoop); - assert(OutestLoop->isRecursivelyLCSSAForm(*DT, *LI)); - if (!CompletelyUnroll) - assert(L->isLoopSimplifyForm()); - assert(SubLoop->isLoopSimplifyForm()); - assert(DT->verify()); -#endif - - // Update LoopInfo if the loop is completely removed. - if (CompletelyUnroll) - LI->erase(L); - - return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled - : LoopUnrollResult::PartiallyUnrolled; -} - -static bool getLoadsAndStores(BasicBlockSet &Blocks, - SmallVector<Value *, 4> &MemInstr) { - // Scan the BBs and collect legal loads and stores. - // Returns false if non-simple loads/stores are found. - for (BasicBlock *BB : Blocks) { - for (Instruction &I : *BB) { - if (auto *Ld = dyn_cast<LoadInst>(&I)) { - if (!Ld->isSimple()) - return false; - MemInstr.push_back(&I); - } else if (auto *St = dyn_cast<StoreInst>(&I)) { - if (!St->isSimple()) - return false; - MemInstr.push_back(&I); - } else if (I.mayReadOrWriteMemory()) { - return false; - } - } - } - return true; -} - -static bool checkDependencies(SmallVector<Value *, 4> &Earlier, - SmallVector<Value *, 4> &Later, - unsigned LoopDepth, bool InnerLoop, - DependenceInfo &DI) { - // Use DA to check for dependencies between loads and stores that make unroll - // and jam invalid - for (Value *I : Earlier) { - for (Value *J : Later) { - Instruction *Src = cast<Instruction>(I); - Instruction *Dst = cast<Instruction>(J); - if (Src == Dst) - continue; - // Ignore Input dependencies. - if (isa<LoadInst>(Src) && isa<LoadInst>(Dst)) - continue; - - // Track dependencies, and if we find them take a conservative approach - // by allowing only = or < (not >), altough some > would be safe - // (depending upon unroll width). - // For the inner loop, we need to disallow any (> <) dependencies - // FIXME: Allow > so long as distance is less than unroll width - if (auto D = DI.depends(Src, Dst, true)) { - assert(D->isOrdered() && "Expected an output, flow or anti dep."); - - if (D->isConfused()) { - LLVM_DEBUG(dbgs() << " Confused dependency between:\n" - << " " << *Src << "\n" - << " " << *Dst << "\n"); - return false; - } - if (!InnerLoop) { - if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT) { - LLVM_DEBUG(dbgs() << " > dependency between:\n" - << " " << *Src << "\n" - << " " << *Dst << "\n"); - return false; - } - } else { - assert(LoopDepth + 1 <= D->getLevels()); - if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT && - D->getDirection(LoopDepth + 1) & Dependence::DVEntry::LT) { - LLVM_DEBUG(dbgs() << " < > dependency between:\n" - << " " << *Src << "\n" - << " " << *Dst << "\n"); - return false; - } - } - } - } - } - return true; -} - -static bool checkDependencies(Loop *L, BasicBlockSet &ForeBlocks, - BasicBlockSet &SubLoopBlocks, - BasicBlockSet &AftBlocks, DependenceInfo &DI) { - // Get all loads/store pairs for each blocks - SmallVector<Value *, 4> ForeMemInstr; - SmallVector<Value *, 4> SubLoopMemInstr; - SmallVector<Value *, 4> AftMemInstr; - if (!getLoadsAndStores(ForeBlocks, ForeMemInstr) || - !getLoadsAndStores(SubLoopBlocks, SubLoopMemInstr) || - !getLoadsAndStores(AftBlocks, AftMemInstr)) - return false; - - // Check for dependencies between any blocks that may change order - unsigned LoopDepth = L->getLoopDepth(); - return checkDependencies(ForeMemInstr, SubLoopMemInstr, LoopDepth, false, - DI) && - checkDependencies(ForeMemInstr, AftMemInstr, LoopDepth, false, DI) && - checkDependencies(SubLoopMemInstr, AftMemInstr, LoopDepth, false, - DI) && - checkDependencies(SubLoopMemInstr, SubLoopMemInstr, LoopDepth, true, - DI); -} - -bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT, - DependenceInfo &DI) { - /* We currently handle outer loops like this: - | - ForeFirst <----\ } - Blocks | } ForeBlocks - ForeLast | } - | | - SubLoopFirst <\ | } - Blocks | | } SubLoopBlocks - SubLoopLast -/ | } - | | - AftFirst | } - Blocks | } AftBlocks - AftLast ------/ } - | - - There are (theoretically) any number of blocks in ForeBlocks, SubLoopBlocks - and AftBlocks, providing that there is one edge from Fores to SubLoops, - one edge from SubLoops to Afts and a single outer loop exit (from Afts). - In practice we currently limit Aft blocks to a single block, and limit - things further in the profitablility checks of the unroll and jam pass. - - Because of the way we rearrange basic blocks, we also require that - the Fore blocks on all unrolled iterations are safe to move before the - SubLoop blocks of all iterations. So we require that the phi node looping - operands of ForeHeader can be moved to at least the end of ForeEnd, so that - we can arrange cloned Fore Blocks before the subloop and match up Phi's - correctly. - - i.e. The old order of blocks used to be F1 S1_1 S1_2 A1 F2 S2_1 S2_2 A2. - It needs to be safe to tranform this to F1 F2 S1_1 S2_1 S1_2 S2_2 A1 A2. - - There are then a number of checks along the lines of no calls, no - exceptions, inner loop IV is consistent, etc. Note that for loops requiring - runtime unrolling, UnrollRuntimeLoopRemainder can also fail in - UnrollAndJamLoop if the trip count cannot be easily calculated. - */ - - if (!L->isLoopSimplifyForm() || L->getSubLoops().size() != 1) - return false; - Loop *SubLoop = L->getSubLoops()[0]; - if (!SubLoop->isLoopSimplifyForm()) - return false; - - BasicBlock *Header = L->getHeader(); - BasicBlock *Latch = L->getLoopLatch(); - BasicBlock *Exit = L->getExitingBlock(); - BasicBlock *SubLoopHeader = SubLoop->getHeader(); - BasicBlock *SubLoopLatch = SubLoop->getLoopLatch(); - BasicBlock *SubLoopExit = SubLoop->getExitingBlock(); - - if (Latch != Exit) - return false; - if (SubLoopLatch != SubLoopExit) - return false; - - if (Header->hasAddressTaken() || SubLoopHeader->hasAddressTaken()) { - LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Address taken\n"); - return false; - } - - // Split blocks into Fore/SubLoop/Aft based on dominators - BasicBlockSet SubLoopBlocks; - BasicBlockSet ForeBlocks; - BasicBlockSet AftBlocks; - if (!partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, - AftBlocks, &DT)) { - LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Incompatible loop layout\n"); - return false; - } - - // Aft blocks may need to move instructions to fore blocks, which becomes more - // difficult if there are multiple (potentially conditionally executed) - // blocks. For now we just exclude loops with multiple aft blocks. - if (AftBlocks.size() != 1) { - LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Can't currently handle " - "multiple blocks after the loop\n"); - return false; - } - - // Check inner loop backedge count is consistent on all iterations of the - // outer loop - if (!hasIterationCountInvariantInParent(SubLoop, SE)) { - LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Inner loop iteration count is " - "not consistent on each iteration\n"); - return false; - } - - // Check the loop safety info for exceptions. - SimpleLoopSafetyInfo LSI; - LSI.computeLoopSafetyInfo(L); - if (LSI.anyBlockMayThrow()) { - LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Something may throw\n"); - return false; - } - - // We've ruled out the easy stuff and now need to check that there are no - // interdependencies which may prevent us from moving the: - // ForeBlocks before Subloop and AftBlocks. - // Subloop before AftBlocks. - // ForeBlock phi operands before the subloop - - // Make sure we can move all instructions we need to before the subloop - if (!processHeaderPhiOperands( - Header, Latch, AftBlocks, [&AftBlocks, &SubLoop](Instruction *I) { - if (SubLoop->contains(I->getParent())) - return false; - if (AftBlocks.count(I->getParent())) { - // If we hit a phi node in afts we know we are done (probably - // LCSSA) - if (isa<PHINode>(I)) - return false; - // Can't move instructions with side effects or memory - // reads/writes - if (I->mayHaveSideEffects() || I->mayReadOrWriteMemory()) - return false; - } - // Keep going - return true; - })) { - LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't move required " - "instructions after subloop to before it\n"); - return false; - } - - // Check for memory dependencies which prohibit the unrolling we are doing. - // Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check - // there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub. - if (!checkDependencies(L, ForeBlocks, SubLoopBlocks, AftBlocks, DI)) { - LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; failed dependency check\n"); - return false; - } - - return true; -} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp deleted file mode 100644 index 005306cf1898..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ /dev/null @@ -1,744 +0,0 @@ -//===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements some loop unrolling utilities for peeling loops -// with dynamically inferred (from PGO) trip counts. See LoopUnroll.cpp for -// unrolling loops with compile-time constant trip counts. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopIterator.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/LoopSimplify.h" -#include "llvm/Transforms/Utils/LoopUtils.h" -#include "llvm/Transforms/Utils/UnrollLoop.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -#include <algorithm> -#include <cassert> -#include <cstdint> -#include <limits> - -using namespace llvm; -using namespace llvm::PatternMatch; - -#define DEBUG_TYPE "loop-unroll" - -STATISTIC(NumPeeled, "Number of loops peeled"); - -static cl::opt<unsigned> UnrollPeelMaxCount( - "unroll-peel-max-count", cl::init(7), cl::Hidden, - cl::desc("Max average trip count which will cause loop peeling.")); - -static cl::opt<unsigned> UnrollForcePeelCount( - "unroll-force-peel-count", cl::init(0), cl::Hidden, - cl::desc("Force a peel count regardless of profiling information.")); - -static cl::opt<bool> UnrollPeelMultiDeoptExit( - "unroll-peel-multi-deopt-exit", cl::init(false), cl::Hidden, - cl::desc("Allow peeling of loops with multiple deopt exits.")); - -// Designates that a Phi is estimated to become invariant after an "infinite" -// number of loop iterations (i.e. only may become an invariant if the loop is -// fully unrolled). -static const unsigned InfiniteIterationsToInvariance = - std::numeric_limits<unsigned>::max(); - -// Check whether we are capable of peeling this loop. -bool llvm::canPeel(Loop *L) { - // Make sure the loop is in simplified form - if (!L->isLoopSimplifyForm()) - return false; - - if (UnrollPeelMultiDeoptExit) { - SmallVector<BasicBlock *, 4> Exits; - L->getUniqueNonLatchExitBlocks(Exits); - - if (!Exits.empty()) { - // Latch's terminator is a conditional branch, Latch is exiting and - // all non Latch exits ends up with deoptimize. - const BasicBlock *Latch = L->getLoopLatch(); - const BranchInst *T = dyn_cast<BranchInst>(Latch->getTerminator()); - return T && T->isConditional() && L->isLoopExiting(Latch) && - all_of(Exits, [](const BasicBlock *BB) { - return BB->getTerminatingDeoptimizeCall(); - }); - } - } - - // Only peel loops that contain a single exit - if (!L->getExitingBlock() || !L->getUniqueExitBlock()) - return false; - - // Don't try to peel loops where the latch is not the exiting block. - // This can be an indication of two different things: - // 1) The loop is not rotated. - // 2) The loop contains irreducible control flow that involves the latch. - if (L->getLoopLatch() != L->getExitingBlock()) - return false; - - return true; -} - -// This function calculates the number of iterations after which the given Phi -// becomes an invariant. The pre-calculated values are memorized in the map. The -// function (shortcut is I) is calculated according to the following definition: -// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge]. -// If %y is a loop invariant, then I(%x) = 1. -// If %y is a Phi from the loop header, I(%x) = I(%y) + 1. -// Otherwise, I(%x) is infinite. -// TODO: Actually if %y is an expression that depends only on Phi %z and some -// loop invariants, we can estimate I(%x) = I(%z) + 1. The example -// looks like: -// %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration. -// %y = phi(0, 5), -// %a = %y + 1. -static unsigned calculateIterationsToInvariance( - PHINode *Phi, Loop *L, BasicBlock *BackEdge, - SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) { - assert(Phi->getParent() == L->getHeader() && - "Non-loop Phi should not be checked for turning into invariant."); - assert(BackEdge == L->getLoopLatch() && "Wrong latch?"); - // If we already know the answer, take it from the map. - auto I = IterationsToInvariance.find(Phi); - if (I != IterationsToInvariance.end()) - return I->second; - - // Otherwise we need to analyze the input from the back edge. - Value *Input = Phi->getIncomingValueForBlock(BackEdge); - // Place infinity to map to avoid infinite recursion for cycled Phis. Such - // cycles can never stop on an invariant. - IterationsToInvariance[Phi] = InfiniteIterationsToInvariance; - unsigned ToInvariance = InfiniteIterationsToInvariance; - - if (L->isLoopInvariant(Input)) - ToInvariance = 1u; - else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) { - // Only consider Phis in header block. - if (IncPhi->getParent() != L->getHeader()) - return InfiniteIterationsToInvariance; - // If the input becomes an invariant after X iterations, then our Phi - // becomes an invariant after X + 1 iterations. - unsigned InputToInvariance = calculateIterationsToInvariance( - IncPhi, L, BackEdge, IterationsToInvariance); - if (InputToInvariance != InfiniteIterationsToInvariance) - ToInvariance = InputToInvariance + 1u; - } - - // If we found that this Phi lies in an invariant chain, update the map. - if (ToInvariance != InfiniteIterationsToInvariance) - IterationsToInvariance[Phi] = ToInvariance; - return ToInvariance; -} - -// Return the number of iterations to peel off that make conditions in the -// body true/false. For example, if we peel 2 iterations off the loop below, -// the condition i < 2 can be evaluated at compile time. -// for (i = 0; i < n; i++) -// if (i < 2) -// .. -// else -// .. -// } -static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, - ScalarEvolution &SE) { - assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form"); - unsigned DesiredPeelCount = 0; - - for (auto *BB : L.blocks()) { - auto *BI = dyn_cast<BranchInst>(BB->getTerminator()); - if (!BI || BI->isUnconditional()) - continue; - - // Ignore loop exit condition. - if (L.getLoopLatch() == BB) - continue; - - Value *Condition = BI->getCondition(); - Value *LeftVal, *RightVal; - CmpInst::Predicate Pred; - if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal)))) - continue; - - const SCEV *LeftSCEV = SE.getSCEV(LeftVal); - const SCEV *RightSCEV = SE.getSCEV(RightVal); - - // Do not consider predicates that are known to be true or false - // independently of the loop iteration. - if (SE.isKnownPredicate(Pred, LeftSCEV, RightSCEV) || - SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), LeftSCEV, - RightSCEV)) - continue; - - // Check if we have a condition with one AddRec and one non AddRec - // expression. Normalize LeftSCEV to be the AddRec. - if (!isa<SCEVAddRecExpr>(LeftSCEV)) { - if (isa<SCEVAddRecExpr>(RightSCEV)) { - std::swap(LeftSCEV, RightSCEV); - Pred = ICmpInst::getSwappedPredicate(Pred); - } else - continue; - } - - const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV); - - // Avoid huge SCEV computations in the loop below, make sure we only - // consider AddRecs of the loop we are trying to peel and avoid - // non-monotonic predicates, as we will not be able to simplify the loop - // body. - // FIXME: For the non-monotonic predicates ICMP_EQ and ICMP_NE we can - // simplify the loop, if we peel 1 additional iteration, if there - // is no wrapping. - bool Increasing; - if (!LeftAR->isAffine() || LeftAR->getLoop() != &L || - !SE.isMonotonicPredicate(LeftAR, Pred, Increasing)) - continue; - (void)Increasing; - - // Check if extending the current DesiredPeelCount lets us evaluate Pred - // or !Pred in the loop body statically. - unsigned NewPeelCount = DesiredPeelCount; - - const SCEV *IterVal = LeftAR->evaluateAtIteration( - SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE); - - // If the original condition is not known, get the negated predicate - // (which holds on the else branch) and check if it is known. This allows - // us to peel of iterations that make the original condition false. - if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV)) - Pred = ICmpInst::getInversePredicate(Pred); - - const SCEV *Step = LeftAR->getStepRecurrence(SE); - while (NewPeelCount < MaxPeelCount && - SE.isKnownPredicate(Pred, IterVal, RightSCEV)) { - IterVal = SE.getAddExpr(IterVal, Step); - NewPeelCount++; - } - - // Only peel the loop if the monotonic predicate !Pred becomes known in the - // first iteration of the loop body after peeling. - if (NewPeelCount > DesiredPeelCount && - SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal, - RightSCEV)) - DesiredPeelCount = NewPeelCount; - } - - return DesiredPeelCount; -} - -// Return the number of iterations we want to peel off. -void llvm::computePeelCount(Loop *L, unsigned LoopSize, - TargetTransformInfo::UnrollingPreferences &UP, - unsigned &TripCount, ScalarEvolution &SE) { - assert(LoopSize > 0 && "Zero loop size is not allowed!"); - // Save the UP.PeelCount value set by the target in - // TTI.getUnrollingPreferences or by the flag -unroll-peel-count. - unsigned TargetPeelCount = UP.PeelCount; - UP.PeelCount = 0; - if (!canPeel(L)) - return; - - // Only try to peel innermost loops. - if (!L->empty()) - return; - - // If the user provided a peel count, use that. - bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0; - if (UserPeelCount) { - LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount - << " iterations.\n"); - UP.PeelCount = UnrollForcePeelCount; - return; - } - - // Skip peeling if it's disabled. - if (!UP.AllowPeeling) - return; - - // Here we try to get rid of Phis which become invariants after 1, 2, ..., N - // iterations of the loop. For this we compute the number for iterations after - // which every Phi is guaranteed to become an invariant, and try to peel the - // maximum number of iterations among these values, thus turning all those - // Phis into invariants. - // First, check that we can peel at least one iteration. - if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) { - // Store the pre-calculated values here. - SmallDenseMap<PHINode *, unsigned> IterationsToInvariance; - // Now go through all Phis to calculate their the number of iterations they - // need to become invariants. - // Start the max computation with the UP.PeelCount value set by the target - // in TTI.getUnrollingPreferences or by the flag -unroll-peel-count. - unsigned DesiredPeelCount = TargetPeelCount; - BasicBlock *BackEdge = L->getLoopLatch(); - assert(BackEdge && "Loop is not in simplified form?"); - for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) { - PHINode *Phi = cast<PHINode>(&*BI); - unsigned ToInvariance = calculateIterationsToInvariance( - Phi, L, BackEdge, IterationsToInvariance); - if (ToInvariance != InfiniteIterationsToInvariance) - DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance); - } - - // Pay respect to limitations implied by loop size and the max peel count. - unsigned MaxPeelCount = UnrollPeelMaxCount; - MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1); - - DesiredPeelCount = std::max(DesiredPeelCount, - countToEliminateCompares(*L, MaxPeelCount, SE)); - - if (DesiredPeelCount > 0) { - DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); - // Consider max peel count limitation. - assert(DesiredPeelCount > 0 && "Wrong loop size estimation?"); - LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount - << " iteration(s) to turn" - << " some Phis into invariants.\n"); - UP.PeelCount = DesiredPeelCount; - return; - } - } - - // Bail if we know the statically calculated trip count. - // In this case we rather prefer partial unrolling. - if (TripCount) - return; - - // If we don't know the trip count, but have reason to believe the average - // trip count is low, peeling should be beneficial, since we will usually - // hit the peeled section. - // We only do this in the presence of profile information, since otherwise - // our estimates of the trip count are not reliable enough. - if (L->getHeader()->getParent()->hasProfileData()) { - Optional<unsigned> PeelCount = getLoopEstimatedTripCount(L); - if (!PeelCount) - return; - - LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount - << "\n"); - - if (*PeelCount) { - if ((*PeelCount <= UnrollPeelMaxCount) && - (LoopSize * (*PeelCount + 1) <= UP.Threshold)) { - LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount - << " iterations.\n"); - UP.PeelCount = *PeelCount; - return; - } - LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n"); - LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); - LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) - << "\n"); - LLVM_DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n"); - } - } -} - -/// Update the branch weights of the latch of a peeled-off loop -/// iteration. -/// This sets the branch weights for the latch of the recently peeled off loop -/// iteration correctly. -/// Our goal is to make sure that: -/// a) The total weight of all the copies of the loop body is preserved. -/// b) The total weight of the loop exit is preserved. -/// c) The body weight is reasonably distributed between the peeled iterations. -/// -/// \param Header The copy of the header block that belongs to next iteration. -/// \param LatchBR The copy of the latch branch that belongs to this iteration. -/// \param IterNumber The serial number of the iteration that was just -/// peeled off. -/// \param AvgIters The average number of iterations we expect the loop to have. -/// \param[in,out] PeeledHeaderWeight The total number of dynamic loop -/// iterations that are unaccounted for. As an input, it represents the number -/// of times we expect to enter the header of the iteration currently being -/// peeled off. The output is the number of times we expect to enter the -/// header of the next iteration. -static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - unsigned IterNumber, unsigned AvgIters, - uint64_t &PeeledHeaderWeight) { - if (!PeeledHeaderWeight) - return; - // FIXME: Pick a more realistic distribution. - // Currently the proportion of weight we assign to the fall-through - // side of the branch drops linearly with the iteration number, and we use - // a 0.9 fudge factor to make the drop-off less sharp... - uint64_t FallThruWeight = - PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9); - uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight; - PeeledHeaderWeight -= ExitWeight; - - unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); - MDBuilder MDB(LatchBR->getContext()); - MDNode *WeightNode = - HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight) - : MDB.createBranchWeights(FallThruWeight, ExitWeight); - LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); -} - -/// Initialize the weights. -/// -/// \param Header The header block. -/// \param LatchBR The latch branch. -/// \param AvgIters The average number of iterations we expect the loop to have. -/// \param[out] ExitWeight The # of times the edge from Latch to Exit is taken. -/// \param[out] CurHeaderWeight The # of times the header is executed. -static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - unsigned AvgIters, uint64_t &ExitWeight, - uint64_t &CurHeaderWeight) { - uint64_t TrueWeight, FalseWeight; - if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) - return; - unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1; - ExitWeight = HeaderIdx ? TrueWeight : FalseWeight; - // The # of times the loop body executes is the sum of the exit block - // is taken and the # of times the backedges are taken. - CurHeaderWeight = TrueWeight + FalseWeight; -} - -/// Update the weights of original Latch block after peeling off all iterations. -/// -/// \param Header The header block. -/// \param LatchBR The latch branch. -/// \param ExitWeight The weight of the edge from Latch to Exit block. -/// \param CurHeaderWeight The # of time the header is executed. -static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - uint64_t ExitWeight, uint64_t CurHeaderWeight) { - // Adjust the branch weights on the loop exit. - if (!ExitWeight) - return; - - // The backedge count is the difference of current header weight and - // current loop exit weight. If the current header weight is smaller than - // the current loop exit weight, we mark the loop backedge weight as 1. - uint64_t BackEdgeWeight = 0; - if (ExitWeight < CurHeaderWeight) - BackEdgeWeight = CurHeaderWeight - ExitWeight; - else - BackEdgeWeight = 1; - MDBuilder MDB(LatchBR->getContext()); - unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1; - MDNode *WeightNode = - HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) - : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); - LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); -} - -/// Clones the body of the loop L, putting it between \p InsertTop and \p -/// InsertBot. -/// \param IterNumber The serial number of the iteration currently being -/// peeled off. -/// \param ExitEdges The exit edges of the original loop. -/// \param[out] NewBlocks A list of the blocks in the newly created clone -/// \param[out] VMap The value map between the loop and the new clone. -/// \param LoopBlocks A helper for DFS-traversal of the loop. -/// \param LVMap A value-map that maps instructions from the original loop to -/// instructions in the last peeled-off iteration. -static void cloneLoopBlocks( - Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, - SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *> > &ExitEdges, - SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, - ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, - LoopInfo *LI) { - BasicBlock *Header = L->getHeader(); - BasicBlock *Latch = L->getLoopLatch(); - BasicBlock *PreHeader = L->getLoopPreheader(); - - Function *F = Header->getParent(); - LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); - LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); - Loop *ParentLoop = L->getParentLoop(); - - // For each block in the original loop, create a new copy, - // and update the value map with the newly created values. - for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { - BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F); - NewBlocks.push_back(NewBB); - - if (ParentLoop) - ParentLoop->addBasicBlockToLoop(NewBB, *LI); - - VMap[*BB] = NewBB; - - // If dominator tree is available, insert nodes to represent cloned blocks. - if (DT) { - if (Header == *BB) - DT->addNewBlock(NewBB, InsertTop); - else { - DomTreeNode *IDom = DT->getNode(*BB)->getIDom(); - // VMap must contain entry for IDom, as the iteration order is RPO. - DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()])); - } - } - } - - // Hook-up the control flow for the newly inserted blocks. - // The new header is hooked up directly to the "top", which is either - // the original loop preheader (for the first iteration) or the previous - // iteration's exiting block (for every other iteration) - InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header])); - - // Similarly, for the latch: - // The original exiting edge is still hooked up to the loop exit. - // The backedge now goes to the "bottom", which is either the loop's real - // header (for the last peeled iteration) or the copied header of the next - // iteration (for every other iteration) - BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); - BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator()); - for (unsigned idx = 0, e = LatchBR->getNumSuccessors(); idx < e; ++idx) - if (LatchBR->getSuccessor(idx) == Header) { - LatchBR->setSuccessor(idx, InsertBot); - break; - } - if (DT) - DT->changeImmediateDominator(InsertBot, NewLatch); - - // The new copy of the loop body starts with a bunch of PHI nodes - // that pick an incoming value from either the preheader, or the previous - // loop iteration. Since this copy is no longer part of the loop, we - // resolve this statically: - // For the first iteration, we use the value from the preheader directly. - // For any other iteration, we replace the phi with the value generated by - // the immediately preceding clone of the loop body (which represents - // the previous iteration). - for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { - PHINode *NewPHI = cast<PHINode>(VMap[&*I]); - if (IterNumber == 0) { - VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader); - } else { - Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch); - Instruction *LatchInst = dyn_cast<Instruction>(LatchVal); - if (LatchInst && L->contains(LatchInst)) - VMap[&*I] = LVMap[LatchInst]; - else - VMap[&*I] = LatchVal; - } - cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); - } - - // Fix up the outgoing values - we need to add a value for the iteration - // we've just created. Note that this must happen *after* the incoming - // values are adjusted, since the value going out of the latch may also be - // a value coming into the header. - for (auto Edge : ExitEdges) - for (PHINode &PHI : Edge.second->phis()) { - Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first); - Instruction *LatchInst = dyn_cast<Instruction>(LatchVal); - if (LatchInst && L->contains(LatchInst)) - LatchVal = VMap[LatchVal]; - PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first])); - } - - // LastValueMap is updated with the values for the current loop - // which are used the next time this function is called. - for (const auto &KV : VMap) - LVMap[KV.first] = KV.second; -} - -/// Peel off the first \p PeelCount iterations of loop \p L. -/// -/// Note that this does not peel them off as a single straight-line block. -/// Rather, each iteration is peeled off separately, and needs to check the -/// exit condition. -/// For loops that dynamically execute \p PeelCount iterations or less -/// this provides a benefit, since the peeled off iterations, which account -/// for the bulk of dynamic execution, can be further simplified by scalar -/// optimizations. -bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, - ScalarEvolution *SE, DominatorTree *DT, - AssumptionCache *AC, bool PreserveLCSSA) { - assert(PeelCount > 0 && "Attempt to peel out zero iterations?"); - assert(canPeel(L) && "Attempt to peel a loop which is not peelable?"); - - LoopBlocksDFS LoopBlocks(L); - LoopBlocks.perform(LI); - - BasicBlock *Header = L->getHeader(); - BasicBlock *PreHeader = L->getLoopPreheader(); - BasicBlock *Latch = L->getLoopLatch(); - SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges; - L->getExitEdges(ExitEdges); - - DenseMap<BasicBlock *, BasicBlock *> ExitIDom; - if (DT) { - assert(L->hasDedicatedExits() && "No dedicated exits?"); - for (auto Edge : ExitEdges) { - if (ExitIDom.count(Edge.second)) - continue; - BasicBlock *BB = DT->getNode(Edge.second)->getIDom()->getBlock(); - assert(L->contains(BB) && "IDom is not in a loop"); - ExitIDom[Edge.second] = BB; - } - } - - Function *F = Header->getParent(); - - // Set up all the necessary basic blocks. It is convenient to split the - // preheader into 3 parts - two blocks to anchor the peeled copy of the loop - // body, and a new preheader for the "real" loop. - - // Peeling the first iteration transforms. - // - // PreHeader: - // ... - // Header: - // LoopBody - // If (cond) goto Header - // Exit: - // - // into - // - // InsertTop: - // LoopBody - // If (!cond) goto Exit - // InsertBot: - // NewPreHeader: - // ... - // Header: - // LoopBody - // If (cond) goto Header - // Exit: - // - // Each following iteration will split the current bottom anchor in two, - // and put the new copy of the loop body between these two blocks. That is, - // after peeling another iteration from the example above, we'll split - // InsertBot, and get: - // - // InsertTop: - // LoopBody - // If (!cond) goto Exit - // InsertBot: - // LoopBody - // If (!cond) goto Exit - // InsertBot.next: - // NewPreHeader: - // ... - // Header: - // LoopBody - // If (cond) goto Header - // Exit: - - BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI); - BasicBlock *InsertBot = - SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI); - BasicBlock *NewPreHeader = - SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); - - InsertTop->setName(Header->getName() + ".peel.begin"); - InsertBot->setName(Header->getName() + ".peel.next"); - NewPreHeader->setName(PreHeader->getName() + ".peel.newph"); - - ValueToValueMapTy LVMap; - - // If we have branch weight information, we'll want to update it for the - // newly created branches. - BranchInst *LatchBR = - cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator()); - uint64_t ExitWeight = 0, CurHeaderWeight = 0; - initBranchWeights(Header, LatchBR, PeelCount, ExitWeight, CurHeaderWeight); - - // For each peeled-off iteration, make a copy of the loop. - for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { - SmallVector<BasicBlock *, 8> NewBlocks; - ValueToValueMapTy VMap; - - // Subtract the exit weight from the current header weight -- the exit - // weight is exactly the weight of the previous iteration's header. - // FIXME: due to the way the distribution is constructed, we need a - // guard here to make sure we don't end up with non-positive weights. - if (ExitWeight < CurHeaderWeight) - CurHeaderWeight -= ExitWeight; - else - CurHeaderWeight = 1; - - cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks, - LoopBlocks, VMap, LVMap, DT, LI); - - // Remap to use values from the current iteration instead of the - // previous one. - remapInstructionsInBlocks(NewBlocks, VMap); - - if (DT) { - // Latches of the cloned loops dominate over the loop exit, so idom of the - // latter is the first cloned loop body, as original PreHeader dominates - // the original loop body. - if (Iter == 0) - for (auto Exit : ExitIDom) - DT->changeImmediateDominator(Exit.first, - cast<BasicBlock>(LVMap[Exit.second])); -#ifdef EXPENSIVE_CHECKS - assert(DT->verify(DominatorTree::VerificationLevel::Fast)); -#endif - } - - auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]); - updateBranchWeights(InsertBot, LatchBRCopy, Iter, - PeelCount, ExitWeight); - // Remove Loop metadata from the latch branch instruction - // because it is not the Loop's latch branch anymore. - LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr); - - InsertTop = InsertBot; - InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); - InsertBot->setName(Header->getName() + ".peel.next"); - - F->getBasicBlockList().splice(InsertTop->getIterator(), - F->getBasicBlockList(), - NewBlocks[0]->getIterator(), F->end()); - } - - // Now adjust the phi nodes in the loop header to get their initial values - // from the last peeled-off iteration instead of the preheader. - for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { - PHINode *PHI = cast<PHINode>(I); - Value *NewVal = PHI->getIncomingValueForBlock(Latch); - Instruction *LatchInst = dyn_cast<Instruction>(NewVal); - if (LatchInst && L->contains(LatchInst)) - NewVal = LVMap[LatchInst]; - - PHI->setIncomingValueForBlock(NewPreHeader, NewVal); - } - - fixupBranchWeights(Header, LatchBR, ExitWeight, CurHeaderWeight); - - if (Loop *ParentLoop = L->getParentLoop()) - L = ParentLoop; - - // We modified the loop, update SE. - SE->forgetTopmostLoop(L); - - // Finally DomtTree must be correct. - assert(DT->verify(DominatorTree::VerificationLevel::Fast)); - - // FIXME: Incrementally update loop-simplify - simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA); - - NumPeeled++; - - return true; -} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp deleted file mode 100644 index d22fdb4d52dc..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ /dev/null @@ -1,958 +0,0 @@ -//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements some loop unrolling utilities for loops with run-time -// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time -// trip counts. -// -// The functions in this file are used to generate extra code when the -// run-time trip count modulo the unroll factor is not 0. When this is the -// case, we need to generate code to execute these 'left over' iterations. -// -// The current strategy generates an if-then-else sequence prior to the -// unrolled loop to execute the 'left over' iterations before or after the -// unrolled loop. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/LoopIterator.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/LoopUtils.h" -#include "llvm/Transforms/Utils/UnrollLoop.h" -#include <algorithm> - -using namespace llvm; - -#define DEBUG_TYPE "loop-unroll" - -STATISTIC(NumRuntimeUnrolled, - "Number of loops unrolled with run-time trip counts"); -static cl::opt<bool> UnrollRuntimeMultiExit( - "unroll-runtime-multi-exit", cl::init(false), cl::Hidden, - cl::desc("Allow runtime unrolling for loops with multiple exits, when " - "epilog is generated")); - -/// Connect the unrolling prolog code to the original loop. -/// The unrolling prolog code contains code to execute the -/// 'extra' iterations if the run-time trip count modulo the -/// unroll count is non-zero. -/// -/// This function performs the following: -/// - Create PHI nodes at prolog end block to combine values -/// that exit the prolog code and jump around the prolog. -/// - Add a PHI operand to a PHI node at the loop exit block -/// for values that exit the prolog and go around the loop. -/// - Branch around the original loop if the trip count is less -/// than the unroll factor. -/// -static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, - BasicBlock *PrologExit, - BasicBlock *OriginalLoopLatchExit, - BasicBlock *PreHeader, BasicBlock *NewPreHeader, - ValueToValueMapTy &VMap, DominatorTree *DT, - LoopInfo *LI, bool PreserveLCSSA) { - // Loop structure should be the following: - // Preheader - // PrologHeader - // ... - // PrologLatch - // PrologExit - // NewPreheader - // Header - // ... - // Latch - // LatchExit - BasicBlock *Latch = L->getLoopLatch(); - assert(Latch && "Loop must have a latch"); - BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]); - - // Create a PHI node for each outgoing value from the original loop - // (which means it is an outgoing value from the prolog code too). - // The new PHI node is inserted in the prolog end basic block. - // The new PHI node value is added as an operand of a PHI node in either - // the loop header or the loop exit block. - for (BasicBlock *Succ : successors(Latch)) { - for (PHINode &PN : Succ->phis()) { - // Add a new PHI node to the prolog end block and add the - // appropriate incoming values. - // TODO: This code assumes that the PrologExit (or the LatchExit block for - // prolog loop) contains only one predecessor from the loop, i.e. the - // PrologLatch. When supporting multiple-exiting block loops, we can have - // two or more blocks that have the LatchExit as the target in the - // original loop. - PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr", - PrologExit->getFirstNonPHI()); - // Adding a value to the new PHI node from the original loop preheader. - // This is the value that skips all the prolog code. - if (L->contains(&PN)) { - // Succ is loop header. - NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), - PreHeader); - } else { - // Succ is LatchExit. - NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader); - } - - Value *V = PN.getIncomingValueForBlock(Latch); - if (Instruction *I = dyn_cast<Instruction>(V)) { - if (L->contains(I)) { - V = VMap.lookup(I); - } - } - // Adding a value to the new PHI node from the last prolog block - // that was created. - NewPN->addIncoming(V, PrologLatch); - - // Update the existing PHI node operand with the value from the - // new PHI node. How this is done depends on if the existing - // PHI node is in the original loop block, or the exit block. - if (L->contains(&PN)) - PN.setIncomingValueForBlock(NewPreHeader, NewPN); - else - PN.addIncoming(NewPN, PrologExit); - } - } - - // Make sure that created prolog loop is in simplified form - SmallVector<BasicBlock *, 4> PrologExitPreds; - Loop *PrologLoop = LI->getLoopFor(PrologLatch); - if (PrologLoop) { - for (BasicBlock *PredBB : predecessors(PrologExit)) - if (PrologLoop->contains(PredBB)) - PrologExitPreds.push_back(PredBB); - - SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI, - nullptr, PreserveLCSSA); - } - - // Create a branch around the original loop, which is taken if there are no - // iterations remaining to be executed after running the prologue. - Instruction *InsertPt = PrologExit->getTerminator(); - IRBuilder<> B(InsertPt); - - assert(Count != 0 && "nonsensical Count!"); - - // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1) - // This means %xtraiter is (BECount + 1) and all of the iterations of this - // loop were executed by the prologue. Note that if BECount <u (Count - 1) - // then (BECount + 1) cannot unsigned-overflow. - Value *BrLoopExit = - B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)); - // Split the exit to maintain loop canonicalization guarantees - SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit)); - SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI, - nullptr, PreserveLCSSA); - // Add the branch to the exit block (around the unrolled loop) - B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader); - InsertPt->eraseFromParent(); - if (DT) - DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit); -} - -/// Connect the unrolling epilog code to the original loop. -/// The unrolling epilog code contains code to execute the -/// 'extra' iterations if the run-time trip count modulo the -/// unroll count is non-zero. -/// -/// This function performs the following: -/// - Update PHI nodes at the unrolling loop exit and epilog loop exit -/// - Create PHI nodes at the unrolling loop exit to combine -/// values that exit the unrolling loop code and jump around it. -/// - Update PHI operands in the epilog loop by the new PHI nodes -/// - Branch around the epilog loop if extra iters (ModVal) is zero. -/// -static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, - BasicBlock *Exit, BasicBlock *PreHeader, - BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, - ValueToValueMapTy &VMap, DominatorTree *DT, - LoopInfo *LI, bool PreserveLCSSA) { - BasicBlock *Latch = L->getLoopLatch(); - assert(Latch && "Loop must have a latch"); - BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]); - - // Loop structure should be the following: - // - // PreHeader - // NewPreHeader - // Header - // ... - // Latch - // NewExit (PN) - // EpilogPreHeader - // EpilogHeader - // ... - // EpilogLatch - // Exit (EpilogPN) - - // Update PHI nodes at NewExit and Exit. - for (PHINode &PN : NewExit->phis()) { - // PN should be used in another PHI located in Exit block as - // Exit was split by SplitBlockPredecessors into Exit and NewExit - // Basicaly it should look like: - // NewExit: - // PN = PHI [I, Latch] - // ... - // Exit: - // EpilogPN = PHI [PN, EpilogPreHeader] - // - // There is EpilogPreHeader incoming block instead of NewExit as - // NewExit was spilt 1 more time to get EpilogPreHeader. - assert(PN.hasOneUse() && "The phi should have 1 use"); - PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser()); - assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block"); - - // Add incoming PreHeader from branch around the Loop - PN.addIncoming(UndefValue::get(PN.getType()), PreHeader); - - Value *V = PN.getIncomingValueForBlock(Latch); - Instruction *I = dyn_cast<Instruction>(V); - if (I && L->contains(I)) - // If value comes from an instruction in the loop add VMap value. - V = VMap.lookup(I); - // For the instruction out of the loop, constant or undefined value - // insert value itself. - EpilogPN->addIncoming(V, EpilogLatch); - - assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 && - "EpilogPN should have EpilogPreHeader incoming block"); - // Change EpilogPreHeader incoming block to NewExit. - EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader), - NewExit); - // Now PHIs should look like: - // NewExit: - // PN = PHI [I, Latch], [undef, PreHeader] - // ... - // Exit: - // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch] - } - - // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader). - // Update corresponding PHI nodes in epilog loop. - for (BasicBlock *Succ : successors(Latch)) { - // Skip this as we already updated phis in exit blocks. - if (!L->contains(Succ)) - continue; - for (PHINode &PN : Succ->phis()) { - // Add new PHI nodes to the loop exit block and update epilog - // PHIs with the new PHI values. - PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr", - NewExit->getFirstNonPHI()); - // Adding a value to the new PHI node from the unrolling loop preheader. - NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader); - // Adding a value to the new PHI node from the unrolling loop latch. - NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch); - - // Update the existing PHI node operand with the value from the new PHI - // node. Corresponding instruction in epilog loop should be PHI. - PHINode *VPN = cast<PHINode>(VMap[&PN]); - VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN); - } - } - - Instruction *InsertPt = NewExit->getTerminator(); - IRBuilder<> B(InsertPt); - Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod"); - assert(Exit && "Loop must have a single exit block only"); - // Split the epilogue exit to maintain loop canonicalization guarantees - SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); - SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr, - PreserveLCSSA); - // Add the branch to the exit block (around the unrolling loop) - B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit); - InsertPt->eraseFromParent(); - if (DT) - DT->changeImmediateDominator(Exit, NewExit); - - // Split the main loop exit to maintain canonicalization guarantees. - SmallVector<BasicBlock*, 4> NewExitPreds{Latch}; - SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr, - PreserveLCSSA); -} - -/// Create a clone of the blocks in a loop and connect them together. -/// If CreateRemainderLoop is false, loop structure will not be cloned, -/// otherwise a new loop will be created including all cloned blocks, and the -/// iterator of it switches to count NewIter down to 0. -/// The cloned blocks should be inserted between InsertTop and InsertBot. -/// If loop structure is cloned InsertTop should be new preheader, InsertBot -/// new loop exit. -/// Return the new cloned loop that is created when CreateRemainderLoop is true. -static Loop * -CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, - const bool UseEpilogRemainder, const bool UnrollRemainder, - BasicBlock *InsertTop, - BasicBlock *InsertBot, BasicBlock *Preheader, - std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, - ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { - StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; - BasicBlock *Header = L->getHeader(); - BasicBlock *Latch = L->getLoopLatch(); - Function *F = Header->getParent(); - LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); - LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); - Loop *ParentLoop = L->getParentLoop(); - NewLoopsMap NewLoops; - NewLoops[ParentLoop] = ParentLoop; - if (!CreateRemainderLoop) - NewLoops[L] = ParentLoop; - - // For each block in the original loop, create a new copy, - // and update the value map with the newly created values. - for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { - BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); - NewBlocks.push_back(NewBB); - - // If we're unrolling the outermost loop, there's no remainder loop, - // and this block isn't in a nested loop, then the new block is not - // in any loop. Otherwise, add it to loopinfo. - if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop) - addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); - - VMap[*BB] = NewBB; - if (Header == *BB) { - // For the first block, add a CFG connection to this newly - // created block. - InsertTop->getTerminator()->setSuccessor(0, NewBB); - } - - if (DT) { - if (Header == *BB) { - // The header is dominated by the preheader. - DT->addNewBlock(NewBB, InsertTop); - } else { - // Copy information from original loop to unrolled loop. - BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock(); - DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB])); - } - } - - if (Latch == *BB) { - // For the last block, if CreateRemainderLoop is false, create a direct - // jump to InsertBot. If not, create a loop back to cloned head. - VMap.erase((*BB)->getTerminator()); - BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); - BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); - IRBuilder<> Builder(LatchBR); - if (!CreateRemainderLoop) { - Builder.CreateBr(InsertBot); - } else { - PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, - suffix + ".iter", - FirstLoopBB->getFirstNonPHI()); - Value *IdxSub = - Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), - NewIdx->getName() + ".sub"); - Value *IdxCmp = - Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); - Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); - NewIdx->addIncoming(NewIter, InsertTop); - NewIdx->addIncoming(IdxSub, NewBB); - } - LatchBR->eraseFromParent(); - } - } - - // Change the incoming values to the ones defined in the preheader or - // cloned loop. - for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { - PHINode *NewPHI = cast<PHINode>(VMap[&*I]); - if (!CreateRemainderLoop) { - if (UseEpilogRemainder) { - unsigned idx = NewPHI->getBasicBlockIndex(Preheader); - NewPHI->setIncomingBlock(idx, InsertTop); - NewPHI->removeIncomingValue(Latch, false); - } else { - VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); - cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); - } - } else { - unsigned idx = NewPHI->getBasicBlockIndex(Preheader); - NewPHI->setIncomingBlock(idx, InsertTop); - BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); - idx = NewPHI->getBasicBlockIndex(Latch); - Value *InVal = NewPHI->getIncomingValue(idx); - NewPHI->setIncomingBlock(idx, NewLatch); - if (Value *V = VMap.lookup(InVal)) - NewPHI->setIncomingValue(idx, V); - } - } - if (CreateRemainderLoop) { - Loop *NewLoop = NewLoops[L]; - MDNode *LoopID = NewLoop->getLoopID(); - assert(NewLoop && "L should have been cloned"); - - // Only add loop metadata if the loop is not going to be completely - // unrolled. - if (UnrollRemainder) - return NewLoop; - - Optional<MDNode *> NewLoopID = makeFollowupLoopID( - LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder}); - if (NewLoopID.hasValue()) { - NewLoop->setLoopID(NewLoopID.getValue()); - - // Do not setLoopAlreadyUnrolled if loop attributes have been defined - // explicitly. - return NewLoop; - } - - // Add unroll disable metadata to disable future unrolling for this loop. - NewLoop->setLoopAlreadyUnrolled(); - return NewLoop; - } - else - return nullptr; -} - -/// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits -/// is populated with all the loop exit blocks other than the LatchExit block. -static bool canSafelyUnrollMultiExitLoop(Loop *L, BasicBlock *LatchExit, - bool PreserveLCSSA, - bool UseEpilogRemainder) { - - // We currently have some correctness constrains in unrolling a multi-exit - // loop. Check for these below. - - // We rely on LCSSA form being preserved when the exit blocks are transformed. - if (!PreserveLCSSA) - return false; - - // TODO: Support multiple exiting blocks jumping to the `LatchExit` when - // UnrollRuntimeMultiExit is true. This will need updating the logic in - // connectEpilog/connectProlog. - if (!LatchExit->getSinglePredecessor()) { - LLVM_DEBUG( - dbgs() << "Bailout for multi-exit handling when latch exit has >1 " - "predecessor.\n"); - return false; - } - // FIXME: We bail out of multi-exit unrolling when epilog loop is generated - // and L is an inner loop. This is because in presence of multiple exits, the - // outer loop is incorrect: we do not add the EpilogPreheader and exit to the - // outer loop. This is automatically handled in the prolog case, so we do not - // have that bug in prolog generation. - if (UseEpilogRemainder && L->getParentLoop()) - return false; - - // All constraints have been satisfied. - return true; -} - -/// Returns true if we can profitably unroll the multi-exit loop L. Currently, -/// we return true only if UnrollRuntimeMultiExit is set to true. -static bool canProfitablyUnrollMultiExitLoop( - Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit, - bool PreserveLCSSA, bool UseEpilogRemainder) { - -#if !defined(NDEBUG) - assert(canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA, - UseEpilogRemainder) && - "Should be safe to unroll before checking profitability!"); -#endif - - // Priority goes to UnrollRuntimeMultiExit if it's supplied. - if (UnrollRuntimeMultiExit.getNumOccurrences()) - return UnrollRuntimeMultiExit; - - // The main pain point with multi-exit loop unrolling is that once unrolled, - // we will not be able to merge all blocks into a straight line code. - // There are branches within the unrolled loop that go to the OtherExits. - // The second point is the increase in code size, but this is true - // irrespective of multiple exits. - - // Note: Both the heuristics below are coarse grained. We are essentially - // enabling unrolling of loops that have a single side exit other than the - // normal LatchExit (i.e. exiting into a deoptimize block). - // The heuristics considered are: - // 1. low number of branches in the unrolled version. - // 2. high predictability of these extra branches. - // We avoid unrolling loops that have more than two exiting blocks. This - // limits the total number of branches in the unrolled loop to be atmost - // the unroll factor (since one of the exiting blocks is the latch block). - SmallVector<BasicBlock*, 4> ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - if (ExitingBlocks.size() > 2) - return false; - - // The second heuristic is that L has one exit other than the latchexit and - // that exit is a deoptimize block. We know that deoptimize blocks are rarely - // taken, which also implies the branch leading to the deoptimize block is - // highly predictable. - return (OtherExits.size() == 1 && - OtherExits[0]->getTerminatingDeoptimizeCall()); - // TODO: These can be fine-tuned further to consider code size or deopt states - // that are captured by the deoptimize exit block. - // Also, we can extend this to support more cases, if we actually - // know of kinds of multiexit loops that would benefit from unrolling. -} - -/// Insert code in the prolog/epilog code when unrolling a loop with a -/// run-time trip-count. -/// -/// This method assumes that the loop unroll factor is total number -/// of loop bodies in the loop after unrolling. (Some folks refer -/// to the unroll factor as the number of *extra* copies added). -/// We assume also that the loop unroll factor is a power-of-two. So, after -/// unrolling the loop, the number of loop bodies executed is 2, -/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch -/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for -/// the switch instruction is generated. -/// -/// ***Prolog case*** -/// extraiters = tripcount % loopfactor -/// if (extraiters == 0) jump Loop: -/// else jump Prol: -/// Prol: LoopBody; -/// extraiters -= 1 // Omitted if unroll factor is 2. -/// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2. -/// if (tripcount < loopfactor) jump End: -/// Loop: -/// ... -/// End: -/// -/// ***Epilog case*** -/// extraiters = tripcount % loopfactor -/// if (tripcount < loopfactor) jump LoopExit: -/// unroll_iters = tripcount - extraiters -/// Loop: LoopBody; (executes unroll_iter times); -/// unroll_iter -= 1 -/// if (unroll_iter != 0) jump Loop: -/// LoopExit: -/// if (extraiters == 0) jump EpilExit: -/// Epil: LoopBody; (executes extraiters times) -/// extraiters -= 1 // Omitted if unroll factor is 2. -/// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2. -/// EpilExit: - -bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, - bool AllowExpensiveTripCount, - bool UseEpilogRemainder, - bool UnrollRemainder, bool ForgetAllSCEV, - LoopInfo *LI, ScalarEvolution *SE, - DominatorTree *DT, AssumptionCache *AC, - bool PreserveLCSSA, Loop **ResultLoop) { - LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); - LLVM_DEBUG(L->dump()); - LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" - : dbgs() << "Using prolog remainder.\n"); - - // Make sure the loop is in canonical form. - if (!L->isLoopSimplifyForm()) { - LLVM_DEBUG(dbgs() << "Not in simplify form!\n"); - return false; - } - - // Guaranteed by LoopSimplifyForm. - BasicBlock *Latch = L->getLoopLatch(); - BasicBlock *Header = L->getHeader(); - - BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); - - if (!LatchBR || LatchBR->isUnconditional()) { - // The loop-rotate pass can be helpful to avoid this in many cases. - LLVM_DEBUG( - dbgs() - << "Loop latch not terminated by a conditional branch.\n"); - return false; - } - - unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; - BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex); - - if (L->contains(LatchExit)) { - // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the - // targets of the Latch be an exit block out of the loop. - LLVM_DEBUG( - dbgs() - << "One of the loop latch successors must be the exit block.\n"); - return false; - } - - // These are exit blocks other than the target of the latch exiting block. - SmallVector<BasicBlock *, 4> OtherExits; - L->getUniqueNonLatchExitBlocks(OtherExits); - bool isMultiExitUnrollingEnabled = - canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA, - UseEpilogRemainder) && - canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, - UseEpilogRemainder); - // Support only single exit and exiting block unless multi-exit loop unrolling is enabled. - if (!isMultiExitUnrollingEnabled && - (!L->getExitingBlock() || OtherExits.size())) { - LLVM_DEBUG( - dbgs() - << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " - "enabled!\n"); - return false; - } - // Use Scalar Evolution to compute the trip count. This allows more loops to - // be unrolled than relying on induction var simplification. - if (!SE) - return false; - - // Only unroll loops with a computable trip count, and the trip count needs - // to be an int value (allowing a pointer type is a TODO item). - // We calculate the backedge count by using getExitCount on the Latch block, - // which is proven to be the only exiting block in this loop. This is same as - // calculating getBackedgeTakenCount on the loop (which computes SCEV for all - // exiting blocks). - const SCEV *BECountSC = SE->getExitCount(L, Latch); - if (isa<SCEVCouldNotCompute>(BECountSC) || - !BECountSC->getType()->isIntegerTy()) { - LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n"); - return false; - } - - unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); - - // Add 1 since the backedge count doesn't include the first loop iteration. - const SCEV *TripCountSC = - SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); - if (isa<SCEVCouldNotCompute>(TripCountSC)) { - LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); - return false; - } - - BasicBlock *PreHeader = L->getLoopPreheader(); - BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); - const DataLayout &DL = Header->getModule()->getDataLayout(); - SCEVExpander Expander(*SE, DL, "loop-unroll"); - if (!AllowExpensiveTripCount && - Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) { - LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); - return false; - } - - // This constraint lets us deal with an overflowing trip count easily; see the - // comment on ModVal below. - if (Log2_32(Count) > BEWidth) { - LLVM_DEBUG( - dbgs() - << "Count failed constraint on overflow trip count calculation.\n"); - return false; - } - - // Loop structure is the following: - // - // PreHeader - // Header - // ... - // Latch - // LatchExit - - BasicBlock *NewPreHeader; - BasicBlock *NewExit = nullptr; - BasicBlock *PrologExit = nullptr; - BasicBlock *EpilogPreHeader = nullptr; - BasicBlock *PrologPreHeader = nullptr; - - if (UseEpilogRemainder) { - // If epilog remainder - // Split PreHeader to insert a branch around loop for unrolling. - NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI); - NewPreHeader->setName(PreHeader->getName() + ".new"); - // Split LatchExit to create phi nodes from branch above. - SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit)); - NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI, - nullptr, PreserveLCSSA); - // NewExit gets its DebugLoc from LatchExit, which is not part of the - // original Loop. - // Fix this by setting Loop's DebugLoc to NewExit. - auto *NewExitTerminator = NewExit->getTerminator(); - NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc()); - // Split NewExit to insert epilog remainder loop. - EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI); - EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); - } else { - // If prolog remainder - // Split the original preheader twice to insert prolog remainder loop - PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI); - PrologPreHeader->setName(Header->getName() + ".prol.preheader"); - PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(), - DT, LI); - PrologExit->setName(Header->getName() + ".prol.loopexit"); - // Split PrologExit to get NewPreHeader. - NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI); - NewPreHeader->setName(PreHeader->getName() + ".new"); - } - // Loop structure should be the following: - // Epilog Prolog - // - // PreHeader PreHeader - // *NewPreHeader *PrologPreHeader - // Header *PrologExit - // ... *NewPreHeader - // Latch Header - // *NewExit ... - // *EpilogPreHeader Latch - // LatchExit LatchExit - - // Calculate conditions for branch around loop for unrolling - // in epilog case and around prolog remainder loop in prolog case. - // Compute the number of extra iterations required, which is: - // extra iterations = run-time trip count % loop unroll factor - PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); - Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), - PreHeaderBR); - Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), - PreHeaderBR); - IRBuilder<> B(PreHeaderBR); - Value *ModVal; - // Calculate ModVal = (BECount + 1) % Count. - // Note that TripCount is BECount + 1. - if (isPowerOf2_32(Count)) { - // When Count is power of 2 we don't BECount for epilog case, however we'll - // need it for a branch around unrolling loop for prolog case. - ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); - // 1. There are no iterations to be run in the prolog/epilog loop. - // OR - // 2. The addition computing TripCount overflowed. - // - // If (2) is true, we know that TripCount really is (1 << BEWidth) and so - // the number of iterations that remain to be run in the original loop is a - // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we - // explicitly check this above). - } else { - // As (BECount + 1) can potentially unsigned overflow we count - // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count. - Value *ModValTmp = B.CreateURem(BECount, - ConstantInt::get(BECount->getType(), - Count)); - Value *ModValAdd = B.CreateAdd(ModValTmp, - ConstantInt::get(ModValTmp->getType(), 1)); - // At that point (BECount % Count) + 1 could be equal to Count. - // To handle this case we need to take mod by Count one more time. - ModVal = B.CreateURem(ModValAdd, - ConstantInt::get(BECount->getType(), Count), - "xtraiter"); - } - Value *BranchVal = - UseEpilogRemainder ? B.CreateICmpULT(BECount, - ConstantInt::get(BECount->getType(), - Count - 1)) : - B.CreateIsNotNull(ModVal, "lcmp.mod"); - BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; - BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; - // Branch to either remainder (extra iterations) loop or unrolling loop. - B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); - PreHeaderBR->eraseFromParent(); - if (DT) { - if (UseEpilogRemainder) - DT->changeImmediateDominator(NewExit, PreHeader); - else - DT->changeImmediateDominator(PrologExit, PreHeader); - } - Function *F = Header->getParent(); - // Get an ordered list of blocks in the loop to help with the ordering of the - // cloned blocks in the prolog/epilog code - LoopBlocksDFS LoopBlocks(L); - LoopBlocks.perform(LI); - - // - // For each extra loop iteration, create a copy of the loop's basic blocks - // and generate a condition that branches to the copy depending on the - // number of 'left over' iterations. - // - std::vector<BasicBlock *> NewBlocks; - ValueToValueMapTy VMap; - - // For unroll factor 2 remainder loop will have 1 iterations. - // Do not create 1 iteration loop. - bool CreateRemainderLoop = (Count != 2); - - // Clone all the basic blocks in the loop. If Count is 2, we don't clone - // the loop, otherwise we create a cloned loop to execute the extra - // iterations. This function adds the appropriate CFG connections. - BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; - BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; - Loop *remainderLoop = CloneLoopBlocks( - L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder, - InsertTop, InsertBot, - NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); - - // Insert the cloned blocks into the function. - F->getBasicBlockList().splice(InsertBot->getIterator(), - F->getBasicBlockList(), - NewBlocks[0]->getIterator(), - F->end()); - - // Now the loop blocks are cloned and the other exiting blocks from the - // remainder are connected to the original Loop's exit blocks. The remaining - // work is to update the phi nodes in the original loop, and take in the - // values from the cloned region. - for (auto *BB : OtherExits) { - for (auto &II : *BB) { - - // Given we preserve LCSSA form, we know that the values used outside the - // loop will be used through these phi nodes at the exit blocks that are - // transformed below. - if (!isa<PHINode>(II)) - break; - PHINode *Phi = cast<PHINode>(&II); - unsigned oldNumOperands = Phi->getNumIncomingValues(); - // Add the incoming values from the remainder code to the end of the phi - // node. - for (unsigned i =0; i < oldNumOperands; i++){ - Value *newVal = VMap.lookup(Phi->getIncomingValue(i)); - // newVal can be a constant or derived from values outside the loop, and - // hence need not have a VMap value. Also, since lookup already generated - // a default "null" VMap entry for this value, we need to populate that - // VMap entry correctly, with the mapped entry being itself. - if (!newVal) { - newVal = Phi->getIncomingValue(i); - VMap[Phi->getIncomingValue(i)] = Phi->getIncomingValue(i); - } - Phi->addIncoming(newVal, - cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)])); - } - } -#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) - for (BasicBlock *SuccBB : successors(BB)) { - assert(!(any_of(OtherExits, - [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) || - SuccBB == LatchExit) && - "Breaks the definition of dedicated exits!"); - } -#endif - } - - // Update the immediate dominator of the exit blocks and blocks that are - // reachable from the exit blocks. This is needed because we now have paths - // from both the original loop and the remainder code reaching the exit - // blocks. While the IDom of these exit blocks were from the original loop, - // now the IDom is the preheader (which decides whether the original loop or - // remainder code should run). - if (DT && !L->getExitingBlock()) { - SmallVector<BasicBlock *, 16> ChildrenToUpdate; - // NB! We have to examine the dom children of all loop blocks, not just - // those which are the IDom of the exit blocks. This is because blocks - // reachable from the exit blocks can have their IDom as the nearest common - // dominator of the exit blocks. - for (auto *BB : L->blocks()) { - auto *DomNodeBB = DT->getNode(BB); - for (auto *DomChild : DomNodeBB->getChildren()) { - auto *DomChildBB = DomChild->getBlock(); - if (!L->contains(LI->getLoopFor(DomChildBB))) - ChildrenToUpdate.push_back(DomChildBB); - } - } - for (auto *BB : ChildrenToUpdate) - DT->changeImmediateDominator(BB, PreHeader); - } - - // Loop structure should be the following: - // Epilog Prolog - // - // PreHeader PreHeader - // NewPreHeader PrologPreHeader - // Header PrologHeader - // ... ... - // Latch PrologLatch - // NewExit PrologExit - // EpilogPreHeader NewPreHeader - // EpilogHeader Header - // ... ... - // EpilogLatch Latch - // LatchExit LatchExit - - // Rewrite the cloned instruction operands to use the values created when the - // clone is created. - for (BasicBlock *BB : NewBlocks) { - for (Instruction &I : *BB) { - RemapInstruction(&I, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - } - } - - if (UseEpilogRemainder) { - // Connect the epilog code to the original loop and update the - // PHI functions. - ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, - EpilogPreHeader, NewPreHeader, VMap, DT, LI, - PreserveLCSSA); - - // Update counter in loop for unrolling. - // I should be multiply of Count. - IRBuilder<> B2(NewPreHeader->getTerminator()); - Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter"); - BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); - B2.SetInsertPoint(LatchBR); - PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter", - Header->getFirstNonPHI()); - Value *IdxSub = - B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), - NewIdx->getName() + ".nsub"); - Value *IdxCmp; - if (LatchBR->getSuccessor(0) == Header) - IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp"); - else - IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp"); - NewIdx->addIncoming(TestVal, NewPreHeader); - NewIdx->addIncoming(IdxSub, Latch); - LatchBR->setCondition(IdxCmp); - } else { - // Connect the prolog code to the original loop and update the - // PHI functions. - ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader, - NewPreHeader, VMap, DT, LI, PreserveLCSSA); - } - - // If this loop is nested, then the loop unroller changes the code in the any - // of its parent loops, so the Scalar Evolution pass needs to be run again. - SE->forgetTopmostLoop(L); - - // Verify that the Dom Tree is correct. -#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) - if (DT) - assert(DT->verify(DominatorTree::VerificationLevel::Full)); -#endif - - // Canonicalize to LoopSimplifyForm both original and remainder loops. We - // cannot rely on the LoopUnrollPass to do this because it only does - // canonicalization for parent/subloops and not the sibling loops. - if (OtherExits.size() > 0) { - // Generate dedicated exit blocks for the original loop, to preserve - // LoopSimplifyForm. - formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA); - // Generate dedicated exit blocks for the remainder loop if one exists, to - // preserve LoopSimplifyForm. - if (remainderLoop) - formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA); - } - - auto UnrollResult = LoopUnrollResult::Unmodified; - if (remainderLoop && UnrollRemainder) { - LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n"); - UnrollResult = - UnrollLoop(remainderLoop, - {/*Count*/ Count - 1, /*TripCount*/ Count - 1, - /*Force*/ false, /*AllowRuntime*/ false, - /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true, - /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1, - /*PeelCount*/ 0, /*UnrollRemainder*/ false, ForgetAllSCEV}, - LI, SE, DT, AC, /*ORE*/ nullptr, PreserveLCSSA); - } - - if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled) - *ResultLoop = remainderLoop; - NumRuntimeUnrolled++; - return true; -} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp deleted file mode 100644 index ec226e65f650..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ /dev/null @@ -1,976 +0,0 @@ -//===-- LoopUtils.cpp - Loop Utility functions -------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines common loop utility functions. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/LoopUtils.h" -#include "llvm/ADT/ScopeExit.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/DomTreeUpdater.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/MemorySSAUpdater.h" -#include "llvm/Analysis/MustExecute.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/DIBuilder.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/IR/ValueHandle.h" -#include "llvm/Pass.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/KnownBits.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" - -using namespace llvm; -using namespace llvm::PatternMatch; - -#define DEBUG_TYPE "loop-utils" - -static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced"; - -bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, - MemorySSAUpdater *MSSAU, - bool PreserveLCSSA) { - bool Changed = false; - - // We re-use a vector for the in-loop predecesosrs. - SmallVector<BasicBlock *, 4> InLoopPredecessors; - - auto RewriteExit = [&](BasicBlock *BB) { - assert(InLoopPredecessors.empty() && - "Must start with an empty predecessors list!"); - auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); }); - - // See if there are any non-loop predecessors of this exit block and - // keep track of the in-loop predecessors. - bool IsDedicatedExit = true; - for (auto *PredBB : predecessors(BB)) - if (L->contains(PredBB)) { - if (isa<IndirectBrInst>(PredBB->getTerminator())) - // We cannot rewrite exiting edges from an indirectbr. - return false; - if (isa<CallBrInst>(PredBB->getTerminator())) - // We cannot rewrite exiting edges from a callbr. - return false; - - InLoopPredecessors.push_back(PredBB); - } else { - IsDedicatedExit = false; - } - - assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!"); - - // Nothing to do if this is already a dedicated exit. - if (IsDedicatedExit) - return false; - - auto *NewExitBB = SplitBlockPredecessors( - BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA); - - if (!NewExitBB) - LLVM_DEBUG( - dbgs() << "WARNING: Can't create a dedicated exit block for loop: " - << *L << "\n"); - else - LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " - << NewExitBB->getName() << "\n"); - return true; - }; - - // Walk the exit blocks directly rather than building up a data structure for - // them, but only visit each one once. - SmallPtrSet<BasicBlock *, 4> Visited; - for (auto *BB : L->blocks()) - for (auto *SuccBB : successors(BB)) { - // We're looking for exit blocks so skip in-loop successors. - if (L->contains(SuccBB)) - continue; - - // Visit each exit block exactly once. - if (!Visited.insert(SuccBB).second) - continue; - - Changed |= RewriteExit(SuccBB); - } - - return Changed; -} - -/// Returns the instructions that use values defined in the loop. -SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { - SmallVector<Instruction *, 8> UsedOutside; - - for (auto *Block : L->getBlocks()) - // FIXME: I believe that this could use copy_if if the Inst reference could - // be adapted into a pointer. - for (auto &Inst : *Block) { - auto Users = Inst.users(); - if (any_of(Users, [&](User *U) { - auto *Use = cast<Instruction>(U); - return !L->contains(Use->getParent()); - })) - UsedOutside.push_back(&Inst); - } - - return UsedOutside; -} - -void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { - // By definition, all loop passes need the LoopInfo analysis and the - // Dominator tree it depends on. Because they all participate in the loop - // pass manager, they must also preserve these. - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addRequired<LoopInfoWrapperPass>(); - AU.addPreserved<LoopInfoWrapperPass>(); - - // We must also preserve LoopSimplify and LCSSA. We locally access their IDs - // here because users shouldn't directly get them from this header. - extern char &LoopSimplifyID; - extern char &LCSSAID; - AU.addRequiredID(LoopSimplifyID); - AU.addPreservedID(LoopSimplifyID); - AU.addRequiredID(LCSSAID); - AU.addPreservedID(LCSSAID); - // This is used in the LPPassManager to perform LCSSA verification on passes - // which preserve lcssa form - AU.addRequired<LCSSAVerificationPass>(); - AU.addPreserved<LCSSAVerificationPass>(); - - // Loop passes are designed to run inside of a loop pass manager which means - // that any function analyses they require must be required by the first loop - // pass in the manager (so that it is computed before the loop pass manager - // runs) and preserved by all loop pasess in the manager. To make this - // reasonably robust, the set needed for most loop passes is maintained here. - // If your loop pass requires an analysis not listed here, you will need to - // carefully audit the loop pass manager nesting structure that results. - AU.addRequired<AAResultsWrapperPass>(); - AU.addPreserved<AAResultsWrapperPass>(); - AU.addPreserved<BasicAAWrapperPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - AU.addPreserved<SCEVAAWrapperPass>(); - AU.addRequired<ScalarEvolutionWrapperPass>(); - AU.addPreserved<ScalarEvolutionWrapperPass>(); -} - -/// Manually defined generic "LoopPass" dependency initialization. This is used -/// to initialize the exact set of passes from above in \c -/// getLoopAnalysisUsage. It can be used within a loop pass's initialization -/// with: -/// -/// INITIALIZE_PASS_DEPENDENCY(LoopPass) -/// -/// As-if "LoopPass" were a pass. -void llvm::initializeLoopPassPass(PassRegistry &Registry) { - INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) - INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) - INITIALIZE_PASS_DEPENDENCY(LoopSimplify) - INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) - INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) - INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) - INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) - INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) - INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) -} - -/// Find string metadata for loop -/// -/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an -/// operand or null otherwise. If the string metadata is not found return -/// Optional's not-a-value. -Optional<const MDOperand *> llvm::findStringMetadataForLoop(const Loop *TheLoop, - StringRef Name) { - MDNode *MD = findOptionMDForLoop(TheLoop, Name); - if (!MD) - return None; - switch (MD->getNumOperands()) { - case 1: - return nullptr; - case 2: - return &MD->getOperand(1); - default: - llvm_unreachable("loop metadata has 0 or 1 operand"); - } -} - -static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop, - StringRef Name) { - MDNode *MD = findOptionMDForLoop(TheLoop, Name); - if (!MD) - return None; - switch (MD->getNumOperands()) { - case 1: - // When the value is absent it is interpreted as 'attribute set'. - return true; - case 2: - if (ConstantInt *IntMD = - mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get())) - return IntMD->getZExtValue(); - return true; - } - llvm_unreachable("unexpected number of options"); -} - -static bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) { - return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false); -} - -llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop, - StringRef Name) { - const MDOperand *AttrMD = - findStringMetadataForLoop(TheLoop, Name).getValueOr(nullptr); - if (!AttrMD) - return None; - - ConstantInt *IntMD = mdconst::extract_or_null<ConstantInt>(AttrMD->get()); - if (!IntMD) - return None; - - return IntMD->getSExtValue(); -} - -Optional<MDNode *> llvm::makeFollowupLoopID( - MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions, - const char *InheritOptionsExceptPrefix, bool AlwaysNew) { - if (!OrigLoopID) { - if (AlwaysNew) - return nullptr; - return None; - } - - assert(OrigLoopID->getOperand(0) == OrigLoopID); - - bool InheritAllAttrs = !InheritOptionsExceptPrefix; - bool InheritSomeAttrs = - InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0'; - SmallVector<Metadata *, 8> MDs; - MDs.push_back(nullptr); - - bool Changed = false; - if (InheritAllAttrs || InheritSomeAttrs) { - for (const MDOperand &Existing : drop_begin(OrigLoopID->operands(), 1)) { - MDNode *Op = cast<MDNode>(Existing.get()); - - auto InheritThisAttribute = [InheritSomeAttrs, - InheritOptionsExceptPrefix](MDNode *Op) { - if (!InheritSomeAttrs) - return false; - - // Skip malformatted attribute metadata nodes. - if (Op->getNumOperands() == 0) - return true; - Metadata *NameMD = Op->getOperand(0).get(); - if (!isa<MDString>(NameMD)) - return true; - StringRef AttrName = cast<MDString>(NameMD)->getString(); - - // Do not inherit excluded attributes. - return !AttrName.startswith(InheritOptionsExceptPrefix); - }; - - if (InheritThisAttribute(Op)) - MDs.push_back(Op); - else - Changed = true; - } - } else { - // Modified if we dropped at least one attribute. - Changed = OrigLoopID->getNumOperands() > 1; - } - - bool HasAnyFollowup = false; - for (StringRef OptionName : FollowupOptions) { - MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName); - if (!FollowupNode) - continue; - - HasAnyFollowup = true; - for (const MDOperand &Option : drop_begin(FollowupNode->operands(), 1)) { - MDs.push_back(Option.get()); - Changed = true; - } - } - - // Attributes of the followup loop not specified explicity, so signal to the - // transformation pass to add suitable attributes. - if (!AlwaysNew && !HasAnyFollowup) - return None; - - // If no attributes were added or remove, the previous loop Id can be reused. - if (!AlwaysNew && !Changed) - return OrigLoopID; - - // No attributes is equivalent to having no !llvm.loop metadata at all. - if (MDs.size() == 1) - return nullptr; - - // Build the new loop ID. - MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs); - FollowupLoopID->replaceOperandWith(0, FollowupLoopID); - return FollowupLoopID; -} - -bool llvm::hasDisableAllTransformsHint(const Loop *L) { - return getBooleanLoopAttribute(L, LLVMLoopDisableNonforced); -} - -TransformationMode llvm::hasUnrollTransformation(Loop *L) { - if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable")) - return TM_SuppressedByUser; - - Optional<int> Count = - getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count"); - if (Count.hasValue()) - return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser; - - if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")) - return TM_ForcedByUser; - - if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full")) - return TM_ForcedByUser; - - if (hasDisableAllTransformsHint(L)) - return TM_Disable; - - return TM_Unspecified; -} - -TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) { - if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable")) - return TM_SuppressedByUser; - - Optional<int> Count = - getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count"); - if (Count.hasValue()) - return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser; - - if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable")) - return TM_ForcedByUser; - - if (hasDisableAllTransformsHint(L)) - return TM_Disable; - - return TM_Unspecified; -} - -TransformationMode llvm::hasVectorizeTransformation(Loop *L) { - Optional<bool> Enable = - getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable"); - - if (Enable == false) - return TM_SuppressedByUser; - - Optional<int> VectorizeWidth = - getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width"); - Optional<int> InterleaveCount = - getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count"); - - // 'Forcing' vector width and interleave count to one effectively disables - // this tranformation. - if (Enable == true && VectorizeWidth == 1 && InterleaveCount == 1) - return TM_SuppressedByUser; - - if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized")) - return TM_Disable; - - if (Enable == true) - return TM_ForcedByUser; - - if (VectorizeWidth == 1 && InterleaveCount == 1) - return TM_Disable; - - if (VectorizeWidth > 1 || InterleaveCount > 1) - return TM_Enable; - - if (hasDisableAllTransformsHint(L)) - return TM_Disable; - - return TM_Unspecified; -} - -TransformationMode llvm::hasDistributeTransformation(Loop *L) { - if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable")) - return TM_ForcedByUser; - - if (hasDisableAllTransformsHint(L)) - return TM_Disable; - - return TM_Unspecified; -} - -TransformationMode llvm::hasLICMVersioningTransformation(Loop *L) { - if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable")) - return TM_SuppressedByUser; - - if (hasDisableAllTransformsHint(L)) - return TM_Disable; - - return TM_Unspecified; -} - -/// Does a BFS from a given node to all of its children inside a given loop. -/// The returned vector of nodes includes the starting point. -SmallVector<DomTreeNode *, 16> -llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) { - SmallVector<DomTreeNode *, 16> Worklist; - auto AddRegionToWorklist = [&](DomTreeNode *DTN) { - // Only include subregions in the top level loop. - BasicBlock *BB = DTN->getBlock(); - if (CurLoop->contains(BB)) - Worklist.push_back(DTN); - }; - - AddRegionToWorklist(N); - - for (size_t I = 0; I < Worklist.size(); I++) - for (DomTreeNode *Child : Worklist[I]->getChildren()) - AddRegionToWorklist(Child); - - return Worklist; -} - -void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr, - ScalarEvolution *SE = nullptr, - LoopInfo *LI = nullptr) { - assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!"); - auto *Preheader = L->getLoopPreheader(); - assert(Preheader && "Preheader should exist!"); - - // Now that we know the removal is safe, remove the loop by changing the - // branch from the preheader to go to the single exit block. - // - // Because we're deleting a large chunk of code at once, the sequence in which - // we remove things is very important to avoid invalidation issues. - - // Tell ScalarEvolution that the loop is deleted. Do this before - // deleting the loop so that ScalarEvolution can look at the loop - // to determine what it needs to clean up. - if (SE) - SE->forgetLoop(L); - - auto *ExitBlock = L->getUniqueExitBlock(); - assert(ExitBlock && "Should have a unique exit block!"); - assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); - - auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator()); - assert(OldBr && "Preheader must end with a branch"); - assert(OldBr->isUnconditional() && "Preheader must have a single successor"); - // Connect the preheader to the exit block. Keep the old edge to the header - // around to perform the dominator tree update in two separate steps - // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge - // preheader -> header. - // - // - // 0. Preheader 1. Preheader 2. Preheader - // | | | | - // V | V | - // Header <--\ | Header <--\ | Header <--\ - // | | | | | | | | | | | - // | V | | | V | | | V | - // | Body --/ | | Body --/ | | Body --/ - // V V V V V - // Exit Exit Exit - // - // By doing this is two separate steps we can perform the dominator tree - // update without using the batch update API. - // - // Even when the loop is never executed, we cannot remove the edge from the - // source block to the exit block. Consider the case where the unexecuted loop - // branches back to an outer loop. If we deleted the loop and removed the edge - // coming to this inner loop, this will break the outer loop structure (by - // deleting the backedge of the outer loop). If the outer loop is indeed a - // non-loop, it will be deleted in a future iteration of loop deletion pass. - IRBuilder<> Builder(OldBr); - Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock); - // Remove the old branch. The conditional branch becomes a new terminator. - OldBr->eraseFromParent(); - - // Rewrite phis in the exit block to get their inputs from the Preheader - // instead of the exiting block. - for (PHINode &P : ExitBlock->phis()) { - // Set the zero'th element of Phi to be from the preheader and remove all - // other incoming values. Given the loop has dedicated exits, all other - // incoming values must be from the exiting blocks. - int PredIndex = 0; - P.setIncomingBlock(PredIndex, Preheader); - // Removes all incoming values from all other exiting blocks (including - // duplicate values from an exiting block). - // Nuke all entries except the zero'th entry which is the preheader entry. - // NOTE! We need to remove Incoming Values in the reverse order as done - // below, to keep the indices valid for deletion (removeIncomingValues - // updates getNumIncomingValues and shifts all values down into the operand - // being deleted). - for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i) - P.removeIncomingValue(e - i, false); - - assert((P.getNumIncomingValues() == 1 && - P.getIncomingBlock(PredIndex) == Preheader) && - "Should have exactly one value and that's from the preheader!"); - } - - // Disconnect the loop body by branching directly to its exit. - Builder.SetInsertPoint(Preheader->getTerminator()); - Builder.CreateBr(ExitBlock); - // Remove the old branch. - Preheader->getTerminator()->eraseFromParent(); - - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); - if (DT) { - // Update the dominator tree by informing it about the new edge from the - // preheader to the exit and the removed edge. - DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}, - {DominatorTree::Delete, Preheader, L->getHeader()}}); - } - - // Use a map to unique and a vector to guarantee deterministic ordering. - llvm::SmallDenseSet<std::pair<DIVariable *, DIExpression *>, 4> DeadDebugSet; - llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst; - - // Given LCSSA form is satisfied, we should not have users of instructions - // within the dead loop outside of the loop. However, LCSSA doesn't take - // unreachable uses into account. We handle them here. - // We could do it after drop all references (in this case all users in the - // loop will be already eliminated and we have less work to do but according - // to API doc of User::dropAllReferences only valid operation after dropping - // references, is deletion. So let's substitute all usages of - // instruction from the loop with undef value of corresponding type first. - for (auto *Block : L->blocks()) - for (Instruction &I : *Block) { - auto *Undef = UndefValue::get(I.getType()); - for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E;) { - Use &U = *UI; - ++UI; - if (auto *Usr = dyn_cast<Instruction>(U.getUser())) - if (L->contains(Usr->getParent())) - continue; - // If we have a DT then we can check that uses outside a loop only in - // unreachable block. - if (DT) - assert(!DT->isReachableFromEntry(U) && - "Unexpected user in reachable block"); - U.set(Undef); - } - auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I); - if (!DVI) - continue; - auto Key = DeadDebugSet.find({DVI->getVariable(), DVI->getExpression()}); - if (Key != DeadDebugSet.end()) - continue; - DeadDebugSet.insert({DVI->getVariable(), DVI->getExpression()}); - DeadDebugInst.push_back(DVI); - } - - // After the loop has been deleted all the values defined and modified - // inside the loop are going to be unavailable. - // Since debug values in the loop have been deleted, inserting an undef - // dbg.value truncates the range of any dbg.value before the loop where the - // loop used to be. This is particularly important for constant values. - DIBuilder DIB(*ExitBlock->getModule()); - Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI(); - assert(InsertDbgValueBefore && - "There should be a non-PHI instruction in exit block, else these " - "instructions will have no parent."); - for (auto *DVI : DeadDebugInst) - DIB.insertDbgValueIntrinsic(UndefValue::get(Builder.getInt32Ty()), - DVI->getVariable(), DVI->getExpression(), - DVI->getDebugLoc(), InsertDbgValueBefore); - - // Remove the block from the reference counting scheme, so that we can - // delete it freely later. - for (auto *Block : L->blocks()) - Block->dropAllReferences(); - - if (LI) { - // Erase the instructions and the blocks without having to worry - // about ordering because we already dropped the references. - // NOTE: This iteration is safe because erasing the block does not remove - // its entry from the loop's block list. We do that in the next section. - for (Loop::block_iterator LpI = L->block_begin(), LpE = L->block_end(); - LpI != LpE; ++LpI) - (*LpI)->eraseFromParent(); - - // Finally, the blocks from loopinfo. This has to happen late because - // otherwise our loop iterators won't work. - - SmallPtrSet<BasicBlock *, 8> blocks; - blocks.insert(L->block_begin(), L->block_end()); - for (BasicBlock *BB : blocks) - LI->removeBlock(BB); - - // The last step is to update LoopInfo now that we've eliminated this loop. - LI->erase(L); - } -} - -Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { - // Support loops with an exiting latch and other existing exists only - // deoptimize. - - // Get the branch weights for the loop's backedge. - BasicBlock *Latch = L->getLoopLatch(); - if (!Latch) - return None; - BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator()); - if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch)) - return None; - - assert((LatchBR->getSuccessor(0) == L->getHeader() || - LatchBR->getSuccessor(1) == L->getHeader()) && - "At least one edge out of the latch must go to the header"); - - SmallVector<BasicBlock *, 4> ExitBlocks; - L->getUniqueNonLatchExitBlocks(ExitBlocks); - if (any_of(ExitBlocks, [](const BasicBlock *EB) { - return !EB->getTerminatingDeoptimizeCall(); - })) - return None; - - // To estimate the number of times the loop body was executed, we want to - // know the number of times the backedge was taken, vs. the number of times - // we exited the loop. - uint64_t TrueVal, FalseVal; - if (!LatchBR->extractProfMetadata(TrueVal, FalseVal)) - return None; - - if (!TrueVal || !FalseVal) - return 0; - - // Divide the count of the backedge by the count of the edge exiting the loop, - // rounding to nearest. - if (LatchBR->getSuccessor(0) == L->getHeader()) - return (TrueVal + (FalseVal / 2)) / FalseVal; - else - return (FalseVal + (TrueVal / 2)) / TrueVal; -} - -bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop, - ScalarEvolution &SE) { - Loop *OuterL = InnerLoop->getParentLoop(); - if (!OuterL) - return true; - - // Get the backedge taken count for the inner loop - BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); - const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch); - if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) || - !InnerLoopBECountSC->getType()->isIntegerTy()) - return false; - - // Get whether count is invariant to the outer loop - ScalarEvolution::LoopDisposition LD = - SE.getLoopDisposition(InnerLoopBECountSC, OuterL); - if (LD != ScalarEvolution::LoopInvariant) - return false; - - return true; -} - -Value *llvm::createMinMaxOp(IRBuilder<> &Builder, - RecurrenceDescriptor::MinMaxRecurrenceKind RK, - Value *Left, Value *Right) { - CmpInst::Predicate P = CmpInst::ICMP_NE; - switch (RK) { - default: - llvm_unreachable("Unknown min/max recurrence kind"); - case RecurrenceDescriptor::MRK_UIntMin: - P = CmpInst::ICMP_ULT; - break; - case RecurrenceDescriptor::MRK_UIntMax: - P = CmpInst::ICMP_UGT; - break; - case RecurrenceDescriptor::MRK_SIntMin: - P = CmpInst::ICMP_SLT; - break; - case RecurrenceDescriptor::MRK_SIntMax: - P = CmpInst::ICMP_SGT; - break; - case RecurrenceDescriptor::MRK_FloatMin: - P = CmpInst::FCMP_OLT; - break; - case RecurrenceDescriptor::MRK_FloatMax: - P = CmpInst::FCMP_OGT; - break; - } - - // We only match FP sequences that are 'fast', so we can unconditionally - // set it on any generated instructions. - IRBuilder<>::FastMathFlagGuard FMFG(Builder); - FastMathFlags FMF; - FMF.setFast(); - Builder.setFastMathFlags(FMF); - - Value *Cmp; - if (RK == RecurrenceDescriptor::MRK_FloatMin || - RK == RecurrenceDescriptor::MRK_FloatMax) - Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); - else - Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); - - Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); - return Select; -} - -// Helper to generate an ordered reduction. -Value * -llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src, - unsigned Op, - RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, - ArrayRef<Value *> RedOps) { - unsigned VF = Src->getType()->getVectorNumElements(); - - // Extract and apply reduction ops in ascending order: - // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1] - Value *Result = Acc; - for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) { - Value *Ext = - Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx)); - - if (Op != Instruction::ICmp && Op != Instruction::FCmp) { - Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext, - "bin.rdx"); - } else { - assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && - "Invalid min/max"); - Result = createMinMaxOp(Builder, MinMaxKind, Result, Ext); - } - - if (!RedOps.empty()) - propagateIRFlags(Result, RedOps); - } - - return Result; -} - -// Helper to generate a log2 shuffle reduction. -Value * -llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, - RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, - ArrayRef<Value *> RedOps) { - unsigned VF = Src->getType()->getVectorNumElements(); - // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles - // and vector ops, reducing the set of values being computed by half each - // round. - assert(isPowerOf2_32(VF) && - "Reduction emission only supported for pow2 vectors!"); - Value *TmpVec = Src; - SmallVector<Constant *, 32> ShuffleMask(VF, nullptr); - for (unsigned i = VF; i != 1; i >>= 1) { - // Move the upper half of the vector to the lower half. - for (unsigned j = 0; j != i / 2; ++j) - ShuffleMask[j] = Builder.getInt32(i / 2 + j); - - // Fill the rest of the mask with undef. - std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), - UndefValue::get(Builder.getInt32Ty())); - - Value *Shuf = Builder.CreateShuffleVector( - TmpVec, UndefValue::get(TmpVec->getType()), - ConstantVector::get(ShuffleMask), "rdx.shuf"); - - if (Op != Instruction::ICmp && Op != Instruction::FCmp) { - // The builder propagates its fast-math-flags setting. - TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf, - "bin.rdx"); - } else { - assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && - "Invalid min/max"); - TmpVec = createMinMaxOp(Builder, MinMaxKind, TmpVec, Shuf); - } - if (!RedOps.empty()) - propagateIRFlags(TmpVec, RedOps); - } - // The result is in the first element of the vector. - return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); -} - -/// Create a simple vector reduction specified by an opcode and some -/// flags (if generating min/max reductions). -Value *llvm::createSimpleTargetReduction( - IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, - Value *Src, TargetTransformInfo::ReductionFlags Flags, - ArrayRef<Value *> RedOps) { - assert(isa<VectorType>(Src->getType()) && "Type must be a vector"); - - std::function<Value *()> BuildFunc; - using RD = RecurrenceDescriptor; - RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; - - switch (Opcode) { - case Instruction::Add: - BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; - break; - case Instruction::Mul: - BuildFunc = [&]() { return Builder.CreateMulReduce(Src); }; - break; - case Instruction::And: - BuildFunc = [&]() { return Builder.CreateAndReduce(Src); }; - break; - case Instruction::Or: - BuildFunc = [&]() { return Builder.CreateOrReduce(Src); }; - break; - case Instruction::Xor: - BuildFunc = [&]() { return Builder.CreateXorReduce(Src); }; - break; - case Instruction::FAdd: - BuildFunc = [&]() { - auto Rdx = Builder.CreateFAddReduce( - Constant::getNullValue(Src->getType()->getVectorElementType()), Src); - return Rdx; - }; - break; - case Instruction::FMul: - BuildFunc = [&]() { - Type *Ty = Src->getType()->getVectorElementType(); - auto Rdx = Builder.CreateFMulReduce(ConstantFP::get(Ty, 1.0), Src); - return Rdx; - }; - break; - case Instruction::ICmp: - if (Flags.IsMaxOp) { - MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax; - BuildFunc = [&]() { - return Builder.CreateIntMaxReduce(Src, Flags.IsSigned); - }; - } else { - MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin; - BuildFunc = [&]() { - return Builder.CreateIntMinReduce(Src, Flags.IsSigned); - }; - } - break; - case Instruction::FCmp: - if (Flags.IsMaxOp) { - MinMaxKind = RD::MRK_FloatMax; - BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); }; - } else { - MinMaxKind = RD::MRK_FloatMin; - BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); }; - } - break; - default: - llvm_unreachable("Unhandled opcode"); - break; - } - if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) - return BuildFunc(); - return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); -} - -/// Create a vector reduction using a given recurrence descriptor. -Value *llvm::createTargetReduction(IRBuilder<> &B, - const TargetTransformInfo *TTI, - RecurrenceDescriptor &Desc, Value *Src, - bool NoNaN) { - // TODO: Support in-order reductions based on the recurrence descriptor. - using RD = RecurrenceDescriptor; - RD::RecurrenceKind RecKind = Desc.getRecurrenceKind(); - TargetTransformInfo::ReductionFlags Flags; - Flags.NoNaN = NoNaN; - - // All ops in the reduction inherit fast-math-flags from the recurrence - // descriptor. - IRBuilder<>::FastMathFlagGuard FMFGuard(B); - B.setFastMathFlags(Desc.getFastMathFlags()); - - switch (RecKind) { - case RD::RK_FloatAdd: - return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags); - case RD::RK_FloatMult: - return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags); - case RD::RK_IntegerAdd: - return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags); - case RD::RK_IntegerMult: - return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags); - case RD::RK_IntegerAnd: - return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags); - case RD::RK_IntegerOr: - return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags); - case RD::RK_IntegerXor: - return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags); - case RD::RK_IntegerMinMax: { - RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind(); - Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax); - Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin); - return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags); - } - case RD::RK_FloatMinMax: { - Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax; - return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags); - } - default: - llvm_unreachable("Unhandled RecKind"); - } -} - -void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { - auto *VecOp = dyn_cast<Instruction>(I); - if (!VecOp) - return; - auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0]) - : dyn_cast<Instruction>(OpValue); - if (!Intersection) - return; - const unsigned Opcode = Intersection->getOpcode(); - VecOp->copyIRFlags(Intersection); - for (auto *V : VL) { - auto *Instr = dyn_cast<Instruction>(V); - if (!Instr) - continue; - if (OpValue == nullptr || Opcode == Instr->getOpcode()) - VecOp->andIRFlags(V); - } -} - -bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L, - ScalarEvolution &SE) { - const SCEV *Zero = SE.getZero(S->getType()); - return SE.isAvailableAtLoopEntry(S, L) && - SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, S, Zero); -} - -bool llvm::isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, - ScalarEvolution &SE) { - const SCEV *Zero = SE.getZero(S->getType()); - return SE.isAvailableAtLoopEntry(S, L) && - SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero); -} - -bool llvm::cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, - bool Signed) { - unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth(); - APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) : - APInt::getMinValue(BitWidth); - auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; - return SE.isAvailableAtLoopEntry(S, L) && - SE.isLoopEntryGuardedByCond(L, Predicate, S, - SE.getConstant(Min)); -} - -bool llvm::cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, - bool Signed) { - unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth(); - APInt Max = Signed ? APInt::getSignedMaxValue(BitWidth) : - APInt::getMaxValue(BitWidth); - auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; - return SE.isAvailableAtLoopEntry(S, L) && - SE.isLoopEntryGuardedByCond(L, Predicate, S, - SE.getConstant(Max)); -} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp deleted file mode 100644 index a9a480a4b7f9..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp +++ /dev/null @@ -1,326 +0,0 @@ -//===- LoopVersioning.cpp - Utility to version a loop ---------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines a utility class to perform loop versioning. The versioned -// loop speculates that otherwise may-aliasing memory accesses don't overlap and -// emits checks to prove this. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/LoopVersioning.h" -#include "llvm/Analysis/LoopAccessAnalysis.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/MDBuilder.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" - -using namespace llvm; - -static cl::opt<bool> - AnnotateNoAlias("loop-version-annotate-no-alias", cl::init(true), - cl::Hidden, - cl::desc("Add no-alias annotation for instructions that " - "are disambiguated by memchecks")); - -LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI, - DominatorTree *DT, ScalarEvolution *SE, - bool UseLAIChecks) - : VersionedLoop(L), NonVersionedLoop(nullptr), LAI(LAI), LI(LI), DT(DT), - SE(SE) { - assert(L->getExitBlock() && "No single exit block"); - assert(L->isLoopSimplifyForm() && "Loop is not in loop-simplify form"); - if (UseLAIChecks) { - setAliasChecks(LAI.getRuntimePointerChecking()->getChecks()); - setSCEVChecks(LAI.getPSE().getUnionPredicate()); - } -} - -void LoopVersioning::setAliasChecks( - SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks) { - AliasChecks = std::move(Checks); -} - -void LoopVersioning::setSCEVChecks(SCEVUnionPredicate Check) { - Preds = std::move(Check); -} - -void LoopVersioning::versionLoop( - const SmallVectorImpl<Instruction *> &DefsUsedOutside) { - Instruction *FirstCheckInst; - Instruction *MemRuntimeCheck; - Value *SCEVRuntimeCheck; - Value *RuntimeCheck = nullptr; - - // Add the memcheck in the original preheader (this is empty initially). - BasicBlock *RuntimeCheckBB = VersionedLoop->getLoopPreheader(); - std::tie(FirstCheckInst, MemRuntimeCheck) = - LAI.addRuntimeChecks(RuntimeCheckBB->getTerminator(), AliasChecks); - - const SCEVUnionPredicate &Pred = LAI.getPSE().getUnionPredicate(); - SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(), - "scev.check"); - SCEVRuntimeCheck = - Exp.expandCodeForPredicate(&Pred, RuntimeCheckBB->getTerminator()); - auto *CI = dyn_cast<ConstantInt>(SCEVRuntimeCheck); - - // Discard the SCEV runtime check if it is always true. - if (CI && CI->isZero()) - SCEVRuntimeCheck = nullptr; - - if (MemRuntimeCheck && SCEVRuntimeCheck) { - RuntimeCheck = BinaryOperator::Create(Instruction::Or, MemRuntimeCheck, - SCEVRuntimeCheck, "lver.safe"); - if (auto *I = dyn_cast<Instruction>(RuntimeCheck)) - I->insertBefore(RuntimeCheckBB->getTerminator()); - } else - RuntimeCheck = MemRuntimeCheck ? MemRuntimeCheck : SCEVRuntimeCheck; - - assert(RuntimeCheck && "called even though we don't need " - "any runtime checks"); - - // Rename the block to make the IR more readable. - RuntimeCheckBB->setName(VersionedLoop->getHeader()->getName() + - ".lver.check"); - - // Create empty preheader for the loop (and after cloning for the - // non-versioned loop). - BasicBlock *PH = - SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI); - PH->setName(VersionedLoop->getHeader()->getName() + ".ph"); - - // Clone the loop including the preheader. - // - // FIXME: This does not currently preserve SimplifyLoop because the exit - // block is a join between the two loops. - SmallVector<BasicBlock *, 8> NonVersionedLoopBlocks; - NonVersionedLoop = - cloneLoopWithPreheader(PH, RuntimeCheckBB, VersionedLoop, VMap, - ".lver.orig", LI, DT, NonVersionedLoopBlocks); - remapInstructionsInBlocks(NonVersionedLoopBlocks, VMap); - - // Insert the conditional branch based on the result of the memchecks. - Instruction *OrigTerm = RuntimeCheckBB->getTerminator(); - BranchInst::Create(NonVersionedLoop->getLoopPreheader(), - VersionedLoop->getLoopPreheader(), RuntimeCheck, OrigTerm); - OrigTerm->eraseFromParent(); - - // The loops merge in the original exit block. This is now dominated by the - // memchecking block. - DT->changeImmediateDominator(VersionedLoop->getExitBlock(), RuntimeCheckBB); - - // Adds the necessary PHI nodes for the versioned loops based on the - // loop-defined values used outside of the loop. - addPHINodes(DefsUsedOutside); -} - -void LoopVersioning::addPHINodes( - const SmallVectorImpl<Instruction *> &DefsUsedOutside) { - BasicBlock *PHIBlock = VersionedLoop->getExitBlock(); - assert(PHIBlock && "No single successor to loop exit block"); - PHINode *PN; - - // First add a single-operand PHI for each DefsUsedOutside if one does not - // exists yet. - for (auto *Inst : DefsUsedOutside) { - // See if we have a single-operand PHI with the value defined by the - // original loop. - for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) { - if (PN->getIncomingValue(0) == Inst) - break; - } - // If not create it. - if (!PN) { - PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver", - &PHIBlock->front()); - SmallVector<User*, 8> UsersToUpdate; - for (User *U : Inst->users()) - if (!VersionedLoop->contains(cast<Instruction>(U)->getParent())) - UsersToUpdate.push_back(U); - for (User *U : UsersToUpdate) - U->replaceUsesOfWith(Inst, PN); - PN->addIncoming(Inst, VersionedLoop->getExitingBlock()); - } - } - - // Then for each PHI add the operand for the edge from the cloned loop. - for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) { - assert(PN->getNumOperands() == 1 && - "Exit block should only have on predecessor"); - - // If the definition was cloned used that otherwise use the same value. - Value *ClonedValue = PN->getIncomingValue(0); - auto Mapped = VMap.find(ClonedValue); - if (Mapped != VMap.end()) - ClonedValue = Mapped->second; - - PN->addIncoming(ClonedValue, NonVersionedLoop->getExitingBlock()); - } -} - -void LoopVersioning::prepareNoAliasMetadata() { - // We need to turn the no-alias relation between pointer checking groups into - // no-aliasing annotations between instructions. - // - // We accomplish this by mapping each pointer checking group (a set of - // pointers memchecked together) to an alias scope and then also mapping each - // group to the list of scopes it can't alias. - - const RuntimePointerChecking *RtPtrChecking = LAI.getRuntimePointerChecking(); - LLVMContext &Context = VersionedLoop->getHeader()->getContext(); - - // First allocate an aliasing scope for each pointer checking group. - // - // While traversing through the checking groups in the loop, also create a - // reverse map from pointers to the pointer checking group they were assigned - // to. - MDBuilder MDB(Context); - MDNode *Domain = MDB.createAnonymousAliasScopeDomain("LVerDomain"); - - for (const auto &Group : RtPtrChecking->CheckingGroups) { - GroupToScope[&Group] = MDB.createAnonymousAliasScope(Domain); - - for (unsigned PtrIdx : Group.Members) - PtrToGroup[RtPtrChecking->getPointerInfo(PtrIdx).PointerValue] = &Group; - } - - // Go through the checks and for each pointer group, collect the scopes for - // each non-aliasing pointer group. - DenseMap<const RuntimePointerChecking::CheckingPtrGroup *, - SmallVector<Metadata *, 4>> - GroupToNonAliasingScopes; - - for (const auto &Check : AliasChecks) - GroupToNonAliasingScopes[Check.first].push_back(GroupToScope[Check.second]); - - // Finally, transform the above to actually map to scope list which is what - // the metadata uses. - - for (auto Pair : GroupToNonAliasingScopes) - GroupToNonAliasingScopeList[Pair.first] = MDNode::get(Context, Pair.second); -} - -void LoopVersioning::annotateLoopWithNoAlias() { - if (!AnnotateNoAlias) - return; - - // First prepare the maps. - prepareNoAliasMetadata(); - - // Add the scope and no-alias metadata to the instructions. - for (Instruction *I : LAI.getDepChecker().getMemoryInstructions()) { - annotateInstWithNoAlias(I); - } -} - -void LoopVersioning::annotateInstWithNoAlias(Instruction *VersionedInst, - const Instruction *OrigInst) { - if (!AnnotateNoAlias) - return; - - LLVMContext &Context = VersionedLoop->getHeader()->getContext(); - const Value *Ptr = isa<LoadInst>(OrigInst) - ? cast<LoadInst>(OrigInst)->getPointerOperand() - : cast<StoreInst>(OrigInst)->getPointerOperand(); - - // Find the group for the pointer and then add the scope metadata. - auto Group = PtrToGroup.find(Ptr); - if (Group != PtrToGroup.end()) { - VersionedInst->setMetadata( - LLVMContext::MD_alias_scope, - MDNode::concatenate( - VersionedInst->getMetadata(LLVMContext::MD_alias_scope), - MDNode::get(Context, GroupToScope[Group->second]))); - - // Add the no-alias metadata. - auto NonAliasingScopeList = GroupToNonAliasingScopeList.find(Group->second); - if (NonAliasingScopeList != GroupToNonAliasingScopeList.end()) - VersionedInst->setMetadata( - LLVMContext::MD_noalias, - MDNode::concatenate( - VersionedInst->getMetadata(LLVMContext::MD_noalias), - NonAliasingScopeList->second)); - } -} - -namespace { -/// Also expose this is a pass. Currently this is only used for -/// unit-testing. It adds all memchecks necessary to remove all may-aliasing -/// array accesses from the loop. -class LoopVersioningPass : public FunctionPass { -public: - LoopVersioningPass() : FunctionPass(ID) { - initializeLoopVersioningPassPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override { - auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>(); - auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - - // Build up a worklist of inner-loops to version. This is necessary as the - // act of versioning a loop creates new loops and can invalidate iterators - // across the loops. - SmallVector<Loop *, 8> Worklist; - - for (Loop *TopLevelLoop : *LI) - for (Loop *L : depth_first(TopLevelLoop)) - // We only handle inner-most loops. - if (L->empty()) - Worklist.push_back(L); - - // Now walk the identified inner loops. - bool Changed = false; - for (Loop *L : Worklist) { - const LoopAccessInfo &LAI = LAA->getInfo(L); - if (L->isLoopSimplifyForm() && !LAI.hasConvergentOp() && - (LAI.getNumRuntimePointerChecks() || - !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) { - LoopVersioning LVer(LAI, L, LI, DT, SE); - LVer.versionLoop(); - LVer.annotateLoopWithNoAlias(); - Changed = true; - } - } - - return Changed; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<LoopInfoWrapperPass>(); - AU.addPreserved<LoopInfoWrapperPass>(); - AU.addRequired<LoopAccessLegacyAnalysis>(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addRequired<ScalarEvolutionWrapperPass>(); - } - - static char ID; -}; -} - -#define LVER_OPTION "loop-versioning" -#define DEBUG_TYPE LVER_OPTION - -char LoopVersioningPass::ID; -static const char LVer_name[] = "Loop Versioning"; - -INITIALIZE_PASS_BEGIN(LoopVersioningPass, LVER_OPTION, LVer_name, false, false) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) -INITIALIZE_PASS_END(LoopVersioningPass, LVER_OPTION, LVer_name, false, false) - -namespace llvm { -FunctionPass *createLoopVersioningPass() { - return new LoopVersioningPass(); -} -} diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp deleted file mode 100644 index fe67e191dc62..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp +++ /dev/null @@ -1,96 +0,0 @@ -//===- LowerInvoke.cpp - Eliminate Invoke instructions --------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This transformation is designed for use by code generators which do not yet -// support stack unwinding. This pass converts 'invoke' instructions to 'call' -// instructions, so that any exception-handling 'landingpad' blocks become dead -// code (which can be removed by running the '-simplifycfg' pass afterwards). -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/LowerInvoke.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Utils.h" -using namespace llvm; - -#define DEBUG_TYPE "lowerinvoke" - -STATISTIC(NumInvokes, "Number of invokes replaced"); - -namespace { - class LowerInvokeLegacyPass : public FunctionPass { - public: - static char ID; // Pass identification, replacement for typeid - explicit LowerInvokeLegacyPass() : FunctionPass(ID) { - initializeLowerInvokeLegacyPassPass(*PassRegistry::getPassRegistry()); - } - bool runOnFunction(Function &F) override; - }; -} - -char LowerInvokeLegacyPass::ID = 0; -INITIALIZE_PASS(LowerInvokeLegacyPass, "lowerinvoke", - "Lower invoke and unwind, for unwindless code generators", - false, false) - -static bool runImpl(Function &F) { - bool Changed = false; - for (BasicBlock &BB : F) - if (InvokeInst *II = dyn_cast<InvokeInst>(BB.getTerminator())) { - SmallVector<Value *, 16> CallArgs(II->arg_begin(), II->arg_end()); - SmallVector<OperandBundleDef, 1> OpBundles; - II->getOperandBundlesAsDefs(OpBundles); - // Insert a normal call instruction... - CallInst *NewCall = - CallInst::Create(II->getFunctionType(), II->getCalledValue(), - CallArgs, OpBundles, "", II); - NewCall->takeName(II); - NewCall->setCallingConv(II->getCallingConv()); - NewCall->setAttributes(II->getAttributes()); - NewCall->setDebugLoc(II->getDebugLoc()); - II->replaceAllUsesWith(NewCall); - - // Insert an unconditional branch to the normal destination. - BranchInst::Create(II->getNormalDest(), II); - - // Remove any PHI node entries from the exception destination. - II->getUnwindDest()->removePredecessor(&BB); - - // Remove the invoke instruction now. - BB.getInstList().erase(II); - - ++NumInvokes; - Changed = true; - } - return Changed; -} - -bool LowerInvokeLegacyPass::runOnFunction(Function &F) { - return runImpl(F); -} - -namespace llvm { -char &LowerInvokePassID = LowerInvokeLegacyPass::ID; - -// Public Interface To the LowerInvoke pass. -FunctionPass *createLowerInvokePass() { return new LowerInvokeLegacyPass(); } - -PreservedAnalyses LowerInvokePass::run(Function &F, - FunctionAnalysisManager &AM) { - bool Changed = runImpl(F); - if (!Changed) - return PreservedAnalyses::all(); - - return PreservedAnalyses::none(); -} -} diff --git a/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp deleted file mode 100644 index 0cc085dc366c..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ /dev/null @@ -1,451 +0,0 @@ -//===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" - -using namespace llvm; - -static unsigned getLoopOperandSizeInBytes(Type *Type) { - if (VectorType *VTy = dyn_cast<VectorType>(Type)) { - return VTy->getBitWidth() / 8; - } - - return Type->getPrimitiveSizeInBits() / 8; -} - -void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, - Value *DstAddr, ConstantInt *CopyLen, - unsigned SrcAlign, unsigned DestAlign, - bool SrcIsVolatile, bool DstIsVolatile, - const TargetTransformInfo &TTI) { - // No need to expand zero length copies. - if (CopyLen->isZero()) - return; - - BasicBlock *PreLoopBB = InsertBefore->getParent(); - BasicBlock *PostLoopBB = nullptr; - Function *ParentFunc = PreLoopBB->getParent(); - LLVMContext &Ctx = PreLoopBB->getContext(); - - Type *TypeOfCopyLen = CopyLen->getType(); - Type *LoopOpType = - TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign); - - unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType); - uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; - - unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); - unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); - - if (LoopEndCount != 0) { - // Split - PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split"); - BasicBlock *LoopBB = - BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB); - PreLoopBB->getTerminator()->setSuccessor(0, LoopBB); - - IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); - - // Cast the Src and Dst pointers to pointers to the loop operand type (if - // needed). - PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); - PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); - if (SrcAddr->getType() != SrcOpType) { - SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); - } - if (DstAddr->getType() != DstOpType) { - DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); - } - - IRBuilder<> LoopBuilder(LoopBB); - PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index"); - LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB); - // Loop Body - Value *SrcGEP = - LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); - Value *Load = LoopBuilder.CreateLoad(LoopOpType, SrcGEP, SrcIsVolatile); - Value *DstGEP = - LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); - LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); - - Value *NewIndex = - LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); - LoopIndex->addIncoming(NewIndex, LoopBB); - - // Create the loop branch condition. - Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount); - LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI), - LoopBB, PostLoopBB); - } - - uint64_t BytesCopied = LoopEndCount * LoopOpSize; - uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied; - if (RemainingBytes) { - IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI() - : InsertBefore); - - // Update the alignment based on the copy size used in the loop body. - SrcAlign = std::min(SrcAlign, LoopOpSize); - DestAlign = std::min(DestAlign, LoopOpSize); - - SmallVector<Type *, 5> RemainingOps; - TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, - SrcAlign, DestAlign); - - for (auto OpTy : RemainingOps) { - // Calaculate the new index - unsigned OperandSize = getLoopOperandSizeInBytes(OpTy); - uint64_t GepIndex = BytesCopied / OperandSize; - assert(GepIndex * OperandSize == BytesCopied && - "Division should have no Remainder!"); - // Cast source to operand type and load - PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS); - Value *CastedSrc = SrcAddr->getType() == SrcPtrType - ? SrcAddr - : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); - Value *SrcGEP = RBuilder.CreateInBoundsGEP( - OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); - Value *Load = RBuilder.CreateLoad(OpTy, SrcGEP, SrcIsVolatile); - - // Cast destination to operand type and store. - PointerType *DstPtrType = PointerType::get(OpTy, DstAS); - Value *CastedDst = DstAddr->getType() == DstPtrType - ? DstAddr - : RBuilder.CreateBitCast(DstAddr, DstPtrType); - Value *DstGEP = RBuilder.CreateInBoundsGEP( - OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex)); - RBuilder.CreateStore(Load, DstGEP, DstIsVolatile); - - BytesCopied += OperandSize; - } - } - assert(BytesCopied == CopyLen->getZExtValue() && - "Bytes copied should match size in the call!"); -} - -void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, - Value *SrcAddr, Value *DstAddr, - Value *CopyLen, unsigned SrcAlign, - unsigned DestAlign, bool SrcIsVolatile, - bool DstIsVolatile, - const TargetTransformInfo &TTI) { - BasicBlock *PreLoopBB = InsertBefore->getParent(); - BasicBlock *PostLoopBB = - PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); - - Function *ParentFunc = PreLoopBB->getParent(); - LLVMContext &Ctx = PreLoopBB->getContext(); - - Type *LoopOpType = - TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign); - unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType); - - IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); - - unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); - unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); - PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); - PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); - if (SrcAddr->getType() != SrcOpType) { - SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); - } - if (DstAddr->getType() != DstOpType) { - DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); - } - - // Calculate the loop trip count, and remaining bytes to copy after the loop. - Type *CopyLenType = CopyLen->getType(); - IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType); - assert(ILengthType && - "expected size argument to memcpy to be an integer type!"); - Type *Int8Type = Type::getInt8Ty(Ctx); - bool LoopOpIsInt8 = LoopOpType == Int8Type; - ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); - Value *RuntimeLoopCount = LoopOpIsInt8 ? - CopyLen : - PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); - BasicBlock *LoopBB = - BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB); - IRBuilder<> LoopBuilder(LoopBB); - - PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index"); - LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); - - Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); - Value *Load = LoopBuilder.CreateLoad(LoopOpType, SrcGEP, SrcIsVolatile); - Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); - LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); - - Value *NewIndex = - LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); - LoopIndex->addIncoming(NewIndex, LoopBB); - - if (!LoopOpIsInt8) { - // Add in the - Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); - Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); - - // Loop body for the residual copy. - BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", - PreLoopBB->getParent(), - PostLoopBB); - // Residual loop header. - BasicBlock *ResHeaderBB = BasicBlock::Create( - Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr); - - // Need to update the pre-loop basic block to branch to the correct place. - // branch to the main loop if the count is non-zero, branch to the residual - // loop if the copy size is smaller then 1 iteration of the main loop but - // non-zero and finally branch to after the residual loop if the memcpy - // size is zero. - ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); - PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), - LoopBB, ResHeaderBB); - PreLoopBB->getTerminator()->eraseFromParent(); - - LoopBuilder.CreateCondBr( - LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, - ResHeaderBB); - - // Determine if we need to branch to the residual loop or bypass it. - IRBuilder<> RHBuilder(ResHeaderBB); - RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero), - ResLoopBB, PostLoopBB); - - // Copy the residual with single byte load/store loop. - IRBuilder<> ResBuilder(ResLoopBB); - PHINode *ResidualIndex = - ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); - ResidualIndex->addIncoming(Zero, ResHeaderBB); - - Value *SrcAsInt8 = - ResBuilder.CreateBitCast(SrcAddr, PointerType::get(Int8Type, SrcAS)); - Value *DstAsInt8 = - ResBuilder.CreateBitCast(DstAddr, PointerType::get(Int8Type, DstAS)); - Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); - Value *SrcGEP = - ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset); - Value *Load = ResBuilder.CreateLoad(Int8Type, SrcGEP, SrcIsVolatile); - Value *DstGEP = - ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset); - ResBuilder.CreateStore(Load, DstGEP, DstIsVolatile); - - Value *ResNewIndex = - ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U)); - ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); - - // Create the loop branch condition. - ResBuilder.CreateCondBr( - ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB, - PostLoopBB); - } else { - // In this case the loop operand type was a byte, and there is no need for a - // residual loop to copy the remaining memory after the main loop. - // We do however need to patch up the control flow by creating the - // terminators for the preloop block and the memcpy loop. - ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); - PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), - LoopBB, PostLoopBB); - PreLoopBB->getTerminator()->eraseFromParent(); - LoopBuilder.CreateCondBr( - LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, - PostLoopBB); - } -} - -// Lower memmove to IR. memmove is required to correctly copy overlapping memory -// regions; therefore, it has to check the relative positions of the source and -// destination pointers and choose the copy direction accordingly. -// -// The code below is an IR rendition of this C function: -// -// void* memmove(void* dst, const void* src, size_t n) { -// unsigned char* d = dst; -// const unsigned char* s = src; -// if (s < d) { -// // copy backwards -// while (n--) { -// d[n] = s[n]; -// } -// } else { -// // copy forward -// for (size_t i = 0; i < n; ++i) { -// d[i] = s[i]; -// } -// } -// return dst; -// } -static void createMemMoveLoop(Instruction *InsertBefore, - Value *SrcAddr, Value *DstAddr, Value *CopyLen, - unsigned SrcAlign, unsigned DestAlign, - bool SrcIsVolatile, bool DstIsVolatile) { - Type *TypeOfCopyLen = CopyLen->getType(); - BasicBlock *OrigBB = InsertBefore->getParent(); - Function *F = OrigBB->getParent(); - - Type *EltTy = cast<PointerType>(SrcAddr->getType())->getElementType(); - - // Create the a comparison of src and dst, based on which we jump to either - // the forward-copy part of the function (if src >= dst) or the backwards-copy - // part (if src < dst). - // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else - // structure. Its block terminators (unconditional branches) are replaced by - // the appropriate conditional branches when the loop is built. - ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT, - SrcAddr, DstAddr, "compare_src_dst"); - Instruction *ThenTerm, *ElseTerm; - SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm, - &ElseTerm); - - // Each part of the function consists of two blocks: - // copy_backwards: used to skip the loop when n == 0 - // copy_backwards_loop: the actual backwards loop BB - // copy_forward: used to skip the loop when n == 0 - // copy_forward_loop: the actual forward loop BB - BasicBlock *CopyBackwardsBB = ThenTerm->getParent(); - CopyBackwardsBB->setName("copy_backwards"); - BasicBlock *CopyForwardBB = ElseTerm->getParent(); - CopyForwardBB->setName("copy_forward"); - BasicBlock *ExitBB = InsertBefore->getParent(); - ExitBB->setName("memmove_done"); - - // Initial comparison of n == 0 that lets us skip the loops altogether. Shared - // between both backwards and forward copy clauses. - ICmpInst *CompareN = - new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen, - ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0"); - - // Copying backwards. - BasicBlock *LoopBB = - BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB); - IRBuilder<> LoopBuilder(LoopBB); - PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); - Value *IndexPtr = LoopBuilder.CreateSub( - LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr"); - Value *Element = LoopBuilder.CreateLoad( - EltTy, LoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, IndexPtr), - "element"); - LoopBuilder.CreateStore( - Element, LoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, IndexPtr)); - LoopBuilder.CreateCondBr( - LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)), - ExitBB, LoopBB); - LoopPhi->addIncoming(IndexPtr, LoopBB); - LoopPhi->addIncoming(CopyLen, CopyBackwardsBB); - BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm); - ThenTerm->eraseFromParent(); - - // Copying forward. - BasicBlock *FwdLoopBB = - BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB); - IRBuilder<> FwdLoopBuilder(FwdLoopBB); - PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr"); - Value *FwdElement = FwdLoopBuilder.CreateLoad( - EltTy, FwdLoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, FwdCopyPhi), - "element"); - FwdLoopBuilder.CreateStore( - FwdElement, FwdLoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, FwdCopyPhi)); - Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd( - FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment"); - FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen), - ExitBB, FwdLoopBB); - FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB); - FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB); - - BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm); - ElseTerm->eraseFromParent(); -} - -static void createMemSetLoop(Instruction *InsertBefore, - Value *DstAddr, Value *CopyLen, Value *SetValue, - unsigned Align, bool IsVolatile) { - Type *TypeOfCopyLen = CopyLen->getType(); - BasicBlock *OrigBB = InsertBefore->getParent(); - Function *F = OrigBB->getParent(); - BasicBlock *NewBB = - OrigBB->splitBasicBlock(InsertBefore, "split"); - BasicBlock *LoopBB - = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB); - - IRBuilder<> Builder(OrigBB->getTerminator()); - - // Cast pointer to the type of value getting stored - unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); - DstAddr = Builder.CreateBitCast(DstAddr, - PointerType::get(SetValue->getType(), dstAS)); - - Builder.CreateCondBr( - Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, - LoopBB); - OrigBB->getTerminator()->eraseFromParent(); - - IRBuilder<> LoopBuilder(LoopBB); - PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); - LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); - - LoopBuilder.CreateStore( - SetValue, - LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex), - IsVolatile); - - Value *NewIndex = - LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); - LoopIndex->addIncoming(NewIndex, LoopBB); - - LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, - NewBB); -} - -void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, - const TargetTransformInfo &TTI) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) { - createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy, - /* SrcAddr */ Memcpy->getRawSource(), - /* DstAddr */ Memcpy->getRawDest(), - /* CopyLen */ CI, - /* SrcAlign */ Memcpy->getSourceAlignment(), - /* DestAlign */ Memcpy->getDestAlignment(), - /* SrcIsVolatile */ Memcpy->isVolatile(), - /* DstIsVolatile */ Memcpy->isVolatile(), - /* TargetTransformInfo */ TTI); - } else { - createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy, - /* SrcAddr */ Memcpy->getRawSource(), - /* DstAddr */ Memcpy->getRawDest(), - /* CopyLen */ Memcpy->getLength(), - /* SrcAlign */ Memcpy->getSourceAlignment(), - /* DestAlign */ Memcpy->getDestAlignment(), - /* SrcIsVolatile */ Memcpy->isVolatile(), - /* DstIsVolatile */ Memcpy->isVolatile(), - /* TargetTransfomrInfo */ TTI); - } -} - -void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) { - createMemMoveLoop(/* InsertBefore */ Memmove, - /* SrcAddr */ Memmove->getRawSource(), - /* DstAddr */ Memmove->getRawDest(), - /* CopyLen */ Memmove->getLength(), - /* SrcAlign */ Memmove->getSourceAlignment(), - /* DestAlign */ Memmove->getDestAlignment(), - /* SrcIsVolatile */ Memmove->isVolatile(), - /* DstIsVolatile */ Memmove->isVolatile()); -} - -void llvm::expandMemSetAsLoop(MemSetInst *Memset) { - createMemSetLoop(/* InsertBefore */ Memset, - /* DstAddr */ Memset->getRawDest(), - /* CopyLen */ Memset->getLength(), - /* SetValue */ Memset->getValue(), - /* Alignment */ Memset->getDestAlignment(), - Memset->isVolatile()); -} diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp deleted file mode 100644 index 8256e3b5f5af..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp +++ /dev/null @@ -1,618 +0,0 @@ -//===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// The LowerSwitch transformation rewrites switch instructions with a sequence -// of branches, which allows targets to get away with not implementing the -// switch instruction until it is convenient. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/LazyValueInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/ConstantRange.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Value.h" -#include "llvm/Pass.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/KnownBits.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include <algorithm> -#include <cassert> -#include <cstdint> -#include <iterator> -#include <limits> -#include <vector> - -using namespace llvm; - -#define DEBUG_TYPE "lower-switch" - -namespace { - - struct IntRange { - int64_t Low, High; - }; - -} // end anonymous namespace - -// Return true iff R is covered by Ranges. -static bool IsInRanges(const IntRange &R, - const std::vector<IntRange> &Ranges) { - // Note: Ranges must be sorted, non-overlapping and non-adjacent. - - // Find the first range whose High field is >= R.High, - // then check if the Low field is <= R.Low. If so, we - // have a Range that covers R. - auto I = llvm::lower_bound( - Ranges, R, [](IntRange A, IntRange B) { return A.High < B.High; }); - return I != Ranges.end() && I->Low <= R.Low; -} - -namespace { - - /// Replace all SwitchInst instructions with chained branch instructions. - class LowerSwitch : public FunctionPass { - public: - // Pass identification, replacement for typeid - static char ID; - - LowerSwitch() : FunctionPass(ID) { - initializeLowerSwitchPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<LazyValueInfoWrapperPass>(); - } - - struct CaseRange { - ConstantInt* Low; - ConstantInt* High; - BasicBlock* BB; - - CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb) - : Low(low), High(high), BB(bb) {} - }; - - using CaseVector = std::vector<CaseRange>; - using CaseItr = std::vector<CaseRange>::iterator; - - private: - void processSwitchInst(SwitchInst *SI, - SmallPtrSetImpl<BasicBlock *> &DeleteList, - AssumptionCache *AC, LazyValueInfo *LVI); - - BasicBlock *switchConvert(CaseItr Begin, CaseItr End, - ConstantInt *LowerBound, ConstantInt *UpperBound, - Value *Val, BasicBlock *Predecessor, - BasicBlock *OrigBlock, BasicBlock *Default, - const std::vector<IntRange> &UnreachableRanges); - BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, - ConstantInt *LowerBound, ConstantInt *UpperBound, - BasicBlock *OrigBlock, BasicBlock *Default); - unsigned Clusterify(CaseVector &Cases, SwitchInst *SI); - }; - - /// The comparison function for sorting the switch case values in the vector. - /// WARNING: Case ranges should be disjoint! - struct CaseCmp { - bool operator()(const LowerSwitch::CaseRange& C1, - const LowerSwitch::CaseRange& C2) { - const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); - const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); - return CI1->getValue().slt(CI2->getValue()); - } - }; - -} // end anonymous namespace - -char LowerSwitch::ID = 0; - -// Publicly exposed interface to pass... -char &llvm::LowerSwitchID = LowerSwitch::ID; - -INITIALIZE_PASS_BEGIN(LowerSwitch, "lowerswitch", - "Lower SwitchInst's to branches", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) -INITIALIZE_PASS_END(LowerSwitch, "lowerswitch", - "Lower SwitchInst's to branches", false, false) - -// createLowerSwitchPass - Interface to this file... -FunctionPass *llvm::createLowerSwitchPass() { - return new LowerSwitch(); -} - -bool LowerSwitch::runOnFunction(Function &F) { - LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI(); - auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>(); - AssumptionCache *AC = ACT ? &ACT->getAssumptionCache(F) : nullptr; - // Prevent LazyValueInfo from using the DominatorTree as LowerSwitch does not - // preserve it and it becomes stale (when available) pretty much immediately. - // Currently the DominatorTree is only used by LowerSwitch indirectly via LVI - // and computeKnownBits to refine isValidAssumeForContext's results. Given - // that the latter can handle some of the simple cases w/o a DominatorTree, - // it's easier to refrain from using the tree than to keep it up to date. - LVI->disableDT(); - - bool Changed = false; - SmallPtrSet<BasicBlock*, 8> DeleteList; - - for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { - BasicBlock *Cur = &*I++; // Advance over block so we don't traverse new blocks - - // If the block is a dead Default block that will be deleted later, don't - // waste time processing it. - if (DeleteList.count(Cur)) - continue; - - if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) { - Changed = true; - processSwitchInst(SI, DeleteList, AC, LVI); - } - } - - for (BasicBlock* BB: DeleteList) { - LVI->eraseBlock(BB); - DeleteDeadBlock(BB); - } - - return Changed; -} - -/// Used for debugging purposes. -LLVM_ATTRIBUTE_USED -static raw_ostream &operator<<(raw_ostream &O, - const LowerSwitch::CaseVector &C) { - O << "["; - - for (LowerSwitch::CaseVector::const_iterator B = C.begin(), E = C.end(); - B != E;) { - O << "[" << B->Low->getValue() << ", " << B->High->getValue() << "]"; - if (++B != E) - O << ", "; - } - - return O << "]"; -} - -/// Update the first occurrence of the "switch statement" BB in the PHI -/// node with the "new" BB. The other occurrences will: -/// -/// 1) Be updated by subsequent calls to this function. Switch statements may -/// have more than one outcoming edge into the same BB if they all have the same -/// value. When the switch statement is converted these incoming edges are now -/// coming from multiple BBs. -/// 2) Removed if subsequent incoming values now share the same case, i.e., -/// multiple outcome edges are condensed into one. This is necessary to keep the -/// number of phi values equal to the number of branches to SuccBB. -static void -fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, - const unsigned NumMergedCases = std::numeric_limits<unsigned>::max()) { - for (BasicBlock::iterator I = SuccBB->begin(), - IE = SuccBB->getFirstNonPHI()->getIterator(); - I != IE; ++I) { - PHINode *PN = cast<PHINode>(I); - - // Only update the first occurrence. - unsigned Idx = 0, E = PN->getNumIncomingValues(); - unsigned LocalNumMergedCases = NumMergedCases; - for (; Idx != E; ++Idx) { - if (PN->getIncomingBlock(Idx) == OrigBB) { - PN->setIncomingBlock(Idx, NewBB); - break; - } - } - - // Remove additional occurrences coming from condensed cases and keep the - // number of incoming values equal to the number of branches to SuccBB. - SmallVector<unsigned, 8> Indices; - for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx) - if (PN->getIncomingBlock(Idx) == OrigBB) { - Indices.push_back(Idx); - LocalNumMergedCases--; - } - // Remove incoming values in the reverse order to prevent invalidating - // *successive* index. - for (unsigned III : llvm::reverse(Indices)) - PN->removeIncomingValue(III); - } -} - -/// Convert the switch statement into a binary lookup of the case values. -/// The function recursively builds this tree. LowerBound and UpperBound are -/// used to keep track of the bounds for Val that have already been checked by -/// a block emitted by one of the previous calls to switchConvert in the call -/// stack. -BasicBlock * -LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, - ConstantInt *UpperBound, Value *Val, - BasicBlock *Predecessor, BasicBlock *OrigBlock, - BasicBlock *Default, - const std::vector<IntRange> &UnreachableRanges) { - assert(LowerBound && UpperBound && "Bounds must be initialized"); - unsigned Size = End - Begin; - - if (Size == 1) { - // Check if the Case Range is perfectly squeezed in between - // already checked Upper and Lower bounds. If it is then we can avoid - // emitting the code that checks if the value actually falls in the range - // because the bounds already tell us so. - if (Begin->Low == LowerBound && Begin->High == UpperBound) { - unsigned NumMergedCases = 0; - NumMergedCases = UpperBound->getSExtValue() - LowerBound->getSExtValue(); - fixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases); - return Begin->BB; - } - return newLeafBlock(*Begin, Val, LowerBound, UpperBound, OrigBlock, - Default); - } - - unsigned Mid = Size / 2; - std::vector<CaseRange> LHS(Begin, Begin + Mid); - LLVM_DEBUG(dbgs() << "LHS: " << LHS << "\n"); - std::vector<CaseRange> RHS(Begin + Mid, End); - LLVM_DEBUG(dbgs() << "RHS: " << RHS << "\n"); - - CaseRange &Pivot = *(Begin + Mid); - LLVM_DEBUG(dbgs() << "Pivot ==> [" << Pivot.Low->getValue() << ", " - << Pivot.High->getValue() << "]\n"); - - // NewLowerBound here should never be the integer minimal value. - // This is because it is computed from a case range that is never - // the smallest, so there is always a case range that has at least - // a smaller value. - ConstantInt *NewLowerBound = Pivot.Low; - - // Because NewLowerBound is never the smallest representable integer - // it is safe here to subtract one. - ConstantInt *NewUpperBound = ConstantInt::get(NewLowerBound->getContext(), - NewLowerBound->getValue() - 1); - - if (!UnreachableRanges.empty()) { - // Check if the gap between LHS's highest and NewLowerBound is unreachable. - int64_t GapLow = LHS.back().High->getSExtValue() + 1; - int64_t GapHigh = NewLowerBound->getSExtValue() - 1; - IntRange Gap = { GapLow, GapHigh }; - if (GapHigh >= GapLow && IsInRanges(Gap, UnreachableRanges)) - NewUpperBound = LHS.back().High; - } - - LLVM_DEBUG(dbgs() << "LHS Bounds ==> [" << LowerBound->getSExtValue() << ", " - << NewUpperBound->getSExtValue() << "]\n" - << "RHS Bounds ==> [" << NewLowerBound->getSExtValue() - << ", " << UpperBound->getSExtValue() << "]\n"); - - // Create a new node that checks if the value is < pivot. Go to the - // left branch if it is and right branch if not. - Function* F = OrigBlock->getParent(); - BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock"); - - ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, - Val, Pivot.Low, "Pivot"); - - BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound, - NewUpperBound, Val, NewNode, OrigBlock, - Default, UnreachableRanges); - BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound, - UpperBound, Val, NewNode, OrigBlock, - Default, UnreachableRanges); - - F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewNode); - NewNode->getInstList().push_back(Comp); - - BranchInst::Create(LBranch, RBranch, Comp, NewNode); - return NewNode; -} - -/// Create a new leaf block for the binary lookup tree. It checks if the -/// switch's value == the case's value. If not, then it jumps to the default -/// branch. At this point in the tree, the value can't be another valid case -/// value, so the jump to the "default" branch is warranted. -BasicBlock *LowerSwitch::newLeafBlock(CaseRange &Leaf, Value *Val, - ConstantInt *LowerBound, - ConstantInt *UpperBound, - BasicBlock *OrigBlock, - BasicBlock *Default) { - Function* F = OrigBlock->getParent(); - BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock"); - F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf); - - // Emit comparison - ICmpInst* Comp = nullptr; - if (Leaf.Low == Leaf.High) { - // Make the seteq instruction... - Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, - Leaf.Low, "SwitchLeaf"); - } else { - // Make range comparison - if (Leaf.Low == LowerBound) { - // Val >= Min && Val <= Hi --> Val <= Hi - Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High, - "SwitchLeaf"); - } else if (Leaf.High == UpperBound) { - // Val <= Max && Val >= Lo --> Val >= Lo - Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SGE, Val, Leaf.Low, - "SwitchLeaf"); - } else if (Leaf.Low->isZero()) { - // Val >= 0 && Val <= Hi --> Val <=u Hi - Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High, - "SwitchLeaf"); - } else { - // Emit V-Lo <=u Hi-Lo - Constant* NegLo = ConstantExpr::getNeg(Leaf.Low); - Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo, - Val->getName()+".off", - NewLeaf); - Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High); - Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound, - "SwitchLeaf"); - } - } - - // Make the conditional branch... - BasicBlock* Succ = Leaf.BB; - BranchInst::Create(Succ, Default, Comp, NewLeaf); - - // If there were any PHI nodes in this successor, rewrite one entry - // from OrigBlock to come from NewLeaf. - for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { - PHINode* PN = cast<PHINode>(I); - // Remove all but one incoming entries from the cluster - uint64_t Range = Leaf.High->getSExtValue() - - Leaf.Low->getSExtValue(); - for (uint64_t j = 0; j < Range; ++j) { - PN->removeIncomingValue(OrigBlock); - } - - int BlockIdx = PN->getBasicBlockIndex(OrigBlock); - assert(BlockIdx != -1 && "Switch didn't go to this successor??"); - PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf); - } - - return NewLeaf; -} - -/// Transform simple list of \p SI's cases into list of CaseRange's \p Cases. -/// \post \p Cases wouldn't contain references to \p SI's default BB. -/// \returns Number of \p SI's cases that do not reference \p SI's default BB. -unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { - unsigned NumSimpleCases = 0; - - // Start with "simple" cases - for (auto Case : SI->cases()) { - if (Case.getCaseSuccessor() == SI->getDefaultDest()) - continue; - Cases.push_back(CaseRange(Case.getCaseValue(), Case.getCaseValue(), - Case.getCaseSuccessor())); - ++NumSimpleCases; - } - - llvm::sort(Cases, CaseCmp()); - - // Merge case into clusters - if (Cases.size() >= 2) { - CaseItr I = Cases.begin(); - for (CaseItr J = std::next(I), E = Cases.end(); J != E; ++J) { - int64_t nextValue = J->Low->getSExtValue(); - int64_t currentValue = I->High->getSExtValue(); - BasicBlock* nextBB = J->BB; - BasicBlock* currentBB = I->BB; - - // If the two neighboring cases go to the same destination, merge them - // into a single case. - assert(nextValue > currentValue && "Cases should be strictly ascending"); - if ((nextValue == currentValue + 1) && (currentBB == nextBB)) { - I->High = J->High; - // FIXME: Combine branch weights. - } else if (++I != J) { - *I = *J; - } - } - Cases.erase(std::next(I), Cases.end()); - } - - return NumSimpleCases; -} - -/// Replace the specified switch instruction with a sequence of chained if-then -/// insts in a balanced binary search. -void LowerSwitch::processSwitchInst(SwitchInst *SI, - SmallPtrSetImpl<BasicBlock *> &DeleteList, - AssumptionCache *AC, LazyValueInfo *LVI) { - BasicBlock *OrigBlock = SI->getParent(); - Function *F = OrigBlock->getParent(); - Value *Val = SI->getCondition(); // The value we are switching on... - BasicBlock* Default = SI->getDefaultDest(); - - // Don't handle unreachable blocks. If there are successors with phis, this - // would leave them behind with missing predecessors. - if ((OrigBlock != &F->getEntryBlock() && pred_empty(OrigBlock)) || - OrigBlock->getSinglePredecessor() == OrigBlock) { - DeleteList.insert(OrigBlock); - return; - } - - // Prepare cases vector. - CaseVector Cases; - const unsigned NumSimpleCases = Clusterify(Cases, SI); - LLVM_DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() - << ". Total non-default cases: " << NumSimpleCases - << "\nCase clusters: " << Cases << "\n"); - - // If there is only the default destination, just branch. - if (Cases.empty()) { - BranchInst::Create(Default, OrigBlock); - // Remove all the references from Default's PHIs to OrigBlock, but one. - fixPhis(Default, OrigBlock, OrigBlock); - SI->eraseFromParent(); - return; - } - - ConstantInt *LowerBound = nullptr; - ConstantInt *UpperBound = nullptr; - bool DefaultIsUnreachableFromSwitch = false; - - if (isa<UnreachableInst>(Default->getFirstNonPHIOrDbg())) { - // Make the bounds tightly fitted around the case value range, because we - // know that the value passed to the switch must be exactly one of the case - // values. - LowerBound = Cases.front().Low; - UpperBound = Cases.back().High; - DefaultIsUnreachableFromSwitch = true; - } else { - // Constraining the range of the value being switched over helps eliminating - // unreachable BBs and minimizing the number of `add` instructions - // newLeafBlock ends up emitting. Running CorrelatedValuePropagation after - // LowerSwitch isn't as good, and also much more expensive in terms of - // compile time for the following reasons: - // 1. it processes many kinds of instructions, not just switches; - // 2. even if limited to icmp instructions only, it will have to process - // roughly C icmp's per switch, where C is the number of cases in the - // switch, while LowerSwitch only needs to call LVI once per switch. - const DataLayout &DL = F->getParent()->getDataLayout(); - KnownBits Known = computeKnownBits(Val, DL, /*Depth=*/0, AC, SI); - // TODO Shouldn't this create a signed range? - ConstantRange KnownBitsRange = - ConstantRange::fromKnownBits(Known, /*IsSigned=*/false); - const ConstantRange LVIRange = LVI->getConstantRange(Val, OrigBlock, SI); - ConstantRange ValRange = KnownBitsRange.intersectWith(LVIRange); - // We delegate removal of unreachable non-default cases to other passes. In - // the unlikely event that some of them survived, we just conservatively - // maintain the invariant that all the cases lie between the bounds. This - // may, however, still render the default case effectively unreachable. - APInt Low = Cases.front().Low->getValue(); - APInt High = Cases.back().High->getValue(); - APInt Min = APIntOps::smin(ValRange.getSignedMin(), Low); - APInt Max = APIntOps::smax(ValRange.getSignedMax(), High); - - LowerBound = ConstantInt::get(SI->getContext(), Min); - UpperBound = ConstantInt::get(SI->getContext(), Max); - DefaultIsUnreachableFromSwitch = (Min + (NumSimpleCases - 1) == Max); - } - - std::vector<IntRange> UnreachableRanges; - - if (DefaultIsUnreachableFromSwitch) { - DenseMap<BasicBlock *, unsigned> Popularity; - unsigned MaxPop = 0; - BasicBlock *PopSucc = nullptr; - - IntRange R = {std::numeric_limits<int64_t>::min(), - std::numeric_limits<int64_t>::max()}; - UnreachableRanges.push_back(R); - for (const auto &I : Cases) { - int64_t Low = I.Low->getSExtValue(); - int64_t High = I.High->getSExtValue(); - - IntRange &LastRange = UnreachableRanges.back(); - if (LastRange.Low == Low) { - // There is nothing left of the previous range. - UnreachableRanges.pop_back(); - } else { - // Terminate the previous range. - assert(Low > LastRange.Low); - LastRange.High = Low - 1; - } - if (High != std::numeric_limits<int64_t>::max()) { - IntRange R = { High + 1, std::numeric_limits<int64_t>::max() }; - UnreachableRanges.push_back(R); - } - - // Count popularity. - int64_t N = High - Low + 1; - unsigned &Pop = Popularity[I.BB]; - if ((Pop += N) > MaxPop) { - MaxPop = Pop; - PopSucc = I.BB; - } - } -#ifndef NDEBUG - /* UnreachableRanges should be sorted and the ranges non-adjacent. */ - for (auto I = UnreachableRanges.begin(), E = UnreachableRanges.end(); - I != E; ++I) { - assert(I->Low <= I->High); - auto Next = I + 1; - if (Next != E) { - assert(Next->Low > I->High); - } - } -#endif - - // As the default block in the switch is unreachable, update the PHI nodes - // (remove all of the references to the default block) to reflect this. - const unsigned NumDefaultEdges = SI->getNumCases() + 1 - NumSimpleCases; - for (unsigned I = 0; I < NumDefaultEdges; ++I) - Default->removePredecessor(OrigBlock); - - // Use the most popular block as the new default, reducing the number of - // cases. - assert(MaxPop > 0 && PopSucc); - Default = PopSucc; - Cases.erase( - llvm::remove_if( - Cases, [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }), - Cases.end()); - - // If there are no cases left, just branch. - if (Cases.empty()) { - BranchInst::Create(Default, OrigBlock); - SI->eraseFromParent(); - // As all the cases have been replaced with a single branch, only keep - // one entry in the PHI nodes. - for (unsigned I = 0 ; I < (MaxPop - 1) ; ++I) - PopSucc->removePredecessor(OrigBlock); - return; - } - - // If the condition was a PHI node with the switch block as a predecessor - // removing predecessors may have caused the condition to be erased. - // Getting the condition value again here protects against that. - Val = SI->getCondition(); - } - - // Create a new, empty default block so that the new hierarchy of - // if-then statements go to this and the PHI nodes are happy. - BasicBlock *NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); - F->getBasicBlockList().insert(Default->getIterator(), NewDefault); - BranchInst::Create(Default, NewDefault); - - BasicBlock *SwitchBlock = - switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val, - OrigBlock, OrigBlock, NewDefault, UnreachableRanges); - - // If there are entries in any PHI nodes for the default edge, make sure - // to update them as well. - fixPhis(Default, OrigBlock, NewDefault); - - // Branch to our shiny new if-then stuff... - BranchInst::Create(SwitchBlock, OrigBlock); - - // We are now done with the switch instruction, delete it. - BasicBlock *OldDefault = SI->getDefaultDest(); - OrigBlock->getInstList().erase(SI); - - // If the Default block has no more predecessors just add it to DeleteList. - if (pred_begin(OldDefault) == pred_end(OldDefault)) - DeleteList.insert(OldDefault); -} diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp deleted file mode 100644 index cd2c81b6abc8..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp +++ /dev/null @@ -1,115 +0,0 @@ -//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass is a simple pass wrapper around the PromoteMemToReg function call -// exposed by the Utils library. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/Mem2Reg.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" -#include "llvm/Support/Casting.h" -#include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/PromoteMemToReg.h" -#include <vector> - -using namespace llvm; - -#define DEBUG_TYPE "mem2reg" - -STATISTIC(NumPromoted, "Number of alloca's promoted"); - -static bool promoteMemoryToRegister(Function &F, DominatorTree &DT, - AssumptionCache &AC) { - std::vector<AllocaInst *> Allocas; - BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function - bool Changed = false; - - while (true) { - Allocas.clear(); - - // Find allocas that are safe to promote, by looking at all instructions in - // the entry node - for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) - if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? - if (isAllocaPromotable(AI)) - Allocas.push_back(AI); - - if (Allocas.empty()) - break; - - PromoteMemToReg(Allocas, DT, &AC); - NumPromoted += Allocas.size(); - Changed = true; - } - return Changed; -} - -PreservedAnalyses PromotePass::run(Function &F, FunctionAnalysisManager &AM) { - auto &DT = AM.getResult<DominatorTreeAnalysis>(F); - auto &AC = AM.getResult<AssumptionAnalysis>(F); - if (!promoteMemoryToRegister(F, DT, AC)) - return PreservedAnalyses::all(); - - PreservedAnalyses PA; - PA.preserveSet<CFGAnalyses>(); - return PA; -} - -namespace { - -struct PromoteLegacyPass : public FunctionPass { - // Pass identification, replacement for typeid - static char ID; - - PromoteLegacyPass() : FunctionPass(ID) { - initializePromoteLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - // runOnFunction - To run this pass, first we calculate the alloca - // instructions that are safe for promotion, then we promote each one. - bool runOnFunction(Function &F) override { - if (skipFunction(F)) - return false; - - DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - AssumptionCache &AC = - getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - return promoteMemoryToRegister(F, DT, AC); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.setPreservesCFG(); - } -}; - -} // end anonymous namespace - -char PromoteLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN(PromoteLegacyPass, "mem2reg", "Promote Memory to " - "Register", - false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(PromoteLegacyPass, "mem2reg", "Promote Memory to Register", - false, false) - -// createPromoteMemoryToRegister - Provide an entry point to create this pass. -FunctionPass *llvm::createPromoteMemoryToRegisterPass() { - return new PromoteLegacyPass(); -} diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp deleted file mode 100644 index c0b7edc547fd..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp +++ /dev/null @@ -1,177 +0,0 @@ -//===- MetaRenamer.cpp - Rename everything with metasyntatic names --------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass renames everything with metasyntatic names. The intent is to use -// this pass after bugpoint reduction to conceal the nature of the original -// program. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/Argument.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalAlias.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/TypeFinder.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Utils.h" - -using namespace llvm; - -static const char *const metaNames[] = { - // See http://en.wikipedia.org/wiki/Metasyntactic_variable - "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge", - "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam" -}; - -namespace { - - // This PRNG is from the ISO C spec. It is intentionally simple and - // unsuitable for cryptographic use. We're just looking for enough - // variety to surprise and delight users. - struct PRNG { - unsigned long next; - - void srand(unsigned int seed) { - next = seed; - } - - int rand() { - next = next * 1103515245 + 12345; - return (unsigned int)(next / 65536) % 32768; - } - }; - - struct Renamer { - Renamer(unsigned int seed) { - prng.srand(seed); - } - - const char *newName() { - return metaNames[prng.rand() % array_lengthof(metaNames)]; - } - - PRNG prng; - }; - - struct MetaRenamer : public ModulePass { - // Pass identification, replacement for typeid - static char ID; - - MetaRenamer() : ModulePass(ID) { - initializeMetaRenamerPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.setPreservesAll(); - } - - bool runOnModule(Module &M) override { - // Seed our PRNG with simple additive sum of ModuleID. We're looking to - // simply avoid always having the same function names, and we need to - // remain deterministic. - unsigned int randSeed = 0; - for (auto C : M.getModuleIdentifier()) - randSeed += C; - - Renamer renamer(randSeed); - - // Rename all aliases - for (auto AI = M.alias_begin(), AE = M.alias_end(); AI != AE; ++AI) { - StringRef Name = AI->getName(); - if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) - continue; - - AI->setName("alias"); - } - - // Rename all global variables - for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) { - StringRef Name = GI->getName(); - if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) - continue; - - GI->setName("global"); - } - - // Rename all struct types - TypeFinder StructTypes; - StructTypes.run(M, true); - for (StructType *STy : StructTypes) { - if (STy->isLiteral() || STy->getName().empty()) continue; - - SmallString<128> NameStorage; - STy->setName((Twine("struct.") + - renamer.newName()).toStringRef(NameStorage)); - } - - // Rename all functions - const TargetLibraryInfo &TLI = - getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); - for (auto &F : M) { - StringRef Name = F.getName(); - LibFunc Tmp; - // Leave library functions alone because their presence or absence could - // affect the behavior of other passes. - if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) || - TLI.getLibFunc(F, Tmp)) - continue; - - // Leave @main alone. The output of -metarenamer might be passed to - // lli for execution and the latter needs a main entry point. - if (Name != "main") - F.setName(renamer.newName()); - - runOnFunction(F); - } - return true; - } - - bool runOnFunction(Function &F) { - for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) - if (!AI->getType()->isVoidTy()) - AI->setName("arg"); - - for (auto &BB : F) { - BB.setName("bb"); - - for (auto &I : BB) - if (!I.getType()->isVoidTy()) - I.setName("tmp"); - } - return true; - } - }; - -} // end anonymous namespace - -char MetaRenamer::ID = 0; - -INITIALIZE_PASS_BEGIN(MetaRenamer, "metarenamer", - "Assign new names to everything", false, false) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(MetaRenamer, "metarenamer", - "Assign new names to everything", false, false) - -//===----------------------------------------------------------------------===// -// -// MetaRenamer - Rename everything with metasyntactic names. -// -ModulePass *llvm::createMetaRenamerPass() { - return new MetaRenamer(); -} diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp deleted file mode 100644 index c84beceee191..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ /dev/null @@ -1,282 +0,0 @@ -//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This family of functions perform manipulations on Modules. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/ModuleUtils.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -static void appendToGlobalArray(const char *Array, Module &M, Function *F, - int Priority, Constant *Data) { - IRBuilder<> IRB(M.getContext()); - FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false); - - // Get the current set of static global constructors and add the new ctor - // to the list. - SmallVector<Constant *, 16> CurrentCtors; - StructType *EltTy = StructType::get( - IRB.getInt32Ty(), PointerType::getUnqual(FnTy), IRB.getInt8PtrTy()); - if (GlobalVariable *GVCtor = M.getNamedGlobal(Array)) { - if (Constant *Init = GVCtor->getInitializer()) { - unsigned n = Init->getNumOperands(); - CurrentCtors.reserve(n + 1); - for (unsigned i = 0; i != n; ++i) - CurrentCtors.push_back(cast<Constant>(Init->getOperand(i))); - } - GVCtor->eraseFromParent(); - } - - // Build a 3 field global_ctor entry. We don't take a comdat key. - Constant *CSVals[3]; - CSVals[0] = IRB.getInt32(Priority); - CSVals[1] = F; - CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy()) - : Constant::getNullValue(IRB.getInt8PtrTy()); - Constant *RuntimeCtorInit = - ConstantStruct::get(EltTy, makeArrayRef(CSVals, EltTy->getNumElements())); - - CurrentCtors.push_back(RuntimeCtorInit); - - // Create a new initializer. - ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size()); - Constant *NewInit = ConstantArray::get(AT, CurrentCtors); - - // Create the new global variable and replace all uses of - // the old global variable with the new one. - (void)new GlobalVariable(M, NewInit->getType(), false, - GlobalValue::AppendingLinkage, NewInit, Array); -} - -void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) { - appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data); -} - -void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) { - appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data); -} - -static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) { - GlobalVariable *GV = M.getGlobalVariable(Name); - SmallPtrSet<Constant *, 16> InitAsSet; - SmallVector<Constant *, 16> Init; - if (GV) { - ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer()); - for (auto &Op : CA->operands()) { - Constant *C = cast_or_null<Constant>(Op); - if (InitAsSet.insert(C).second) - Init.push_back(C); - } - GV->eraseFromParent(); - } - - Type *Int8PtrTy = llvm::Type::getInt8PtrTy(M.getContext()); - for (auto *V : Values) { - Constant *C = ConstantExpr::getBitCast(V, Int8PtrTy); - if (InitAsSet.insert(C).second) - Init.push_back(C); - } - - if (Init.empty()) - return; - - ArrayType *ATy = ArrayType::get(Int8PtrTy, Init.size()); - GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, - ConstantArray::get(ATy, Init), Name); - GV->setSection("llvm.metadata"); -} - -void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) { - appendToUsedList(M, "llvm.used", Values); -} - -void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) { - appendToUsedList(M, "llvm.compiler.used", Values); -} - -FunctionCallee -llvm::declareSanitizerInitFunction(Module &M, StringRef InitName, - ArrayRef<Type *> InitArgTypes) { - assert(!InitName.empty() && "Expected init function name"); - return M.getOrInsertFunction( - InitName, - FunctionType::get(Type::getVoidTy(M.getContext()), InitArgTypes, false), - AttributeList()); -} - -std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions( - Module &M, StringRef CtorName, StringRef InitName, - ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, - StringRef VersionCheckName) { - assert(!InitName.empty() && "Expected init function name"); - assert(InitArgs.size() == InitArgTypes.size() && - "Sanitizer's init function expects different number of arguments"); - FunctionCallee InitFunction = - declareSanitizerInitFunction(M, InitName, InitArgTypes); - Function *Ctor = Function::Create( - FunctionType::get(Type::getVoidTy(M.getContext()), false), - GlobalValue::InternalLinkage, CtorName, &M); - BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor); - IRBuilder<> IRB(ReturnInst::Create(M.getContext(), CtorBB)); - IRB.CreateCall(InitFunction, InitArgs); - if (!VersionCheckName.empty()) { - FunctionCallee VersionCheckFunction = M.getOrInsertFunction( - VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false), - AttributeList()); - IRB.CreateCall(VersionCheckFunction, {}); - } - return std::make_pair(Ctor, InitFunction); -} - -std::pair<Function *, FunctionCallee> -llvm::getOrCreateSanitizerCtorAndInitFunctions( - Module &M, StringRef CtorName, StringRef InitName, - ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, - function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback, - StringRef VersionCheckName) { - assert(!CtorName.empty() && "Expected ctor function name"); - - if (Function *Ctor = M.getFunction(CtorName)) - // FIXME: Sink this logic into the module, similar to the handling of - // globals. This will make moving to a concurrent model much easier. - if (Ctor->arg_size() == 0 || - Ctor->getReturnType() == Type::getVoidTy(M.getContext())) - return {Ctor, declareSanitizerInitFunction(M, InitName, InitArgTypes)}; - - Function *Ctor; - FunctionCallee InitFunction; - std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions( - M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName); - FunctionsCreatedCallback(Ctor, InitFunction); - return std::make_pair(Ctor, InitFunction); -} - -Function *llvm::getOrCreateInitFunction(Module &M, StringRef Name) { - assert(!Name.empty() && "Expected init function name"); - if (Function *F = M.getFunction(Name)) { - if (F->arg_size() != 0 || - F->getReturnType() != Type::getVoidTy(M.getContext())) { - std::string Err; - raw_string_ostream Stream(Err); - Stream << "Sanitizer interface function defined with wrong type: " << *F; - report_fatal_error(Err); - } - return F; - } - Function *F = - cast<Function>(M.getOrInsertFunction(Name, AttributeList(), - Type::getVoidTy(M.getContext())) - .getCallee()); - - appendToGlobalCtors(M, F, 0); - - return F; -} - -void llvm::filterDeadComdatFunctions( - Module &M, SmallVectorImpl<Function *> &DeadComdatFunctions) { - // Build a map from the comdat to the number of entries in that comdat we - // think are dead. If this fully covers the comdat group, then the entire - // group is dead. If we find another entry in the comdat group though, we'll - // have to preserve the whole group. - SmallDenseMap<Comdat *, int, 16> ComdatEntriesCovered; - for (Function *F : DeadComdatFunctions) { - Comdat *C = F->getComdat(); - assert(C && "Expected all input GVs to be in a comdat!"); - ComdatEntriesCovered[C] += 1; - } - - auto CheckComdat = [&](Comdat &C) { - auto CI = ComdatEntriesCovered.find(&C); - if (CI == ComdatEntriesCovered.end()) - return; - - // If this could have been covered by a dead entry, just subtract one to - // account for it. - if (CI->second > 0) { - CI->second -= 1; - return; - } - - // If we've already accounted for all the entries that were dead, the - // entire comdat is alive so remove it from the map. - ComdatEntriesCovered.erase(CI); - }; - - auto CheckAllComdats = [&] { - for (Function &F : M.functions()) - if (Comdat *C = F.getComdat()) { - CheckComdat(*C); - if (ComdatEntriesCovered.empty()) - return; - } - for (GlobalVariable &GV : M.globals()) - if (Comdat *C = GV.getComdat()) { - CheckComdat(*C); - if (ComdatEntriesCovered.empty()) - return; - } - for (GlobalAlias &GA : M.aliases()) - if (Comdat *C = GA.getComdat()) { - CheckComdat(*C); - if (ComdatEntriesCovered.empty()) - return; - } - }; - CheckAllComdats(); - - if (ComdatEntriesCovered.empty()) { - DeadComdatFunctions.clear(); - return; - } - - // Remove the entries that were not covering. - erase_if(DeadComdatFunctions, [&](GlobalValue *GV) { - return ComdatEntriesCovered.find(GV->getComdat()) == - ComdatEntriesCovered.end(); - }); -} - -std::string llvm::getUniqueModuleId(Module *M) { - MD5 Md5; - bool ExportsSymbols = false; - auto AddGlobal = [&](GlobalValue &GV) { - if (GV.isDeclaration() || GV.getName().startswith("llvm.") || - !GV.hasExternalLinkage() || GV.hasComdat()) - return; - ExportsSymbols = true; - Md5.update(GV.getName()); - Md5.update(ArrayRef<uint8_t>{0}); - }; - - for (auto &F : *M) - AddGlobal(F); - for (auto &GV : M->globals()) - AddGlobal(GV); - for (auto &GA : M->aliases()) - AddGlobal(GA); - for (auto &IF : M->ifuncs()) - AddGlobal(IF); - - if (!ExportsSymbols) - return ""; - - MD5::MD5Result R; - Md5.final(R); - - SmallString<32> Str; - MD5::stringifyResult(R, Str); - return ("$" + Str).str(); -} diff --git a/contrib/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp b/contrib/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp deleted file mode 100644 index ac8991e9d475..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp +++ /dev/null @@ -1,120 +0,0 @@ -//===- NameAnonGlobals.cpp - ThinLTO Support: Name Unnamed Globals --------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements naming anonymous globals to make sure they can be -// referred to by ThinLTO. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/NameAnonGlobals.h" - -#include "llvm/ADT/SmallString.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/MD5.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" - -using namespace llvm; - -namespace { -// Compute a "unique" hash for the module based on the name of the public -// globals. -class ModuleHasher { - Module &TheModule; - std::string TheHash; - -public: - ModuleHasher(Module &M) : TheModule(M) {} - - /// Return the lazily computed hash. - std::string &get() { - if (!TheHash.empty()) - // Cache hit :) - return TheHash; - - MD5 Hasher; - for (auto &F : TheModule) { - if (F.isDeclaration() || F.hasLocalLinkage() || !F.hasName()) - continue; - auto Name = F.getName(); - Hasher.update(Name); - } - for (auto &GV : TheModule.globals()) { - if (GV.isDeclaration() || GV.hasLocalLinkage() || !GV.hasName()) - continue; - auto Name = GV.getName(); - Hasher.update(Name); - } - - // Now return the result. - MD5::MD5Result Hash; - Hasher.final(Hash); - SmallString<32> Result; - MD5::stringifyResult(Hash, Result); - TheHash = Result.str(); - return TheHash; - } -}; -} // end anonymous namespace - -// Rename all the anon globals in the module -bool llvm::nameUnamedGlobals(Module &M) { - bool Changed = false; - ModuleHasher ModuleHash(M); - int count = 0; - auto RenameIfNeed = [&](GlobalValue &GV) { - if (GV.hasName()) - return; - GV.setName(Twine("anon.") + ModuleHash.get() + "." + Twine(count++)); - Changed = true; - }; - for (auto &GO : M.global_objects()) - RenameIfNeed(GO); - for (auto &GA : M.aliases()) - RenameIfNeed(GA); - - return Changed; -} - -namespace { - -// Legacy pass that provides a name to every anon globals. -class NameAnonGlobalLegacyPass : public ModulePass { - -public: - /// Pass identification, replacement for typeid - static char ID; - - /// Specify pass name for debug output - StringRef getPassName() const override { return "Name Anon Globals"; } - - explicit NameAnonGlobalLegacyPass() : ModulePass(ID) {} - - bool runOnModule(Module &M) override { return nameUnamedGlobals(M); } -}; -char NameAnonGlobalLegacyPass::ID = 0; - -} // anonymous namespace - -PreservedAnalyses NameAnonGlobalPass::run(Module &M, - ModuleAnalysisManager &AM) { - if (!nameUnamedGlobals(M)) - return PreservedAnalyses::all(); - - return PreservedAnalyses::none(); -} - -INITIALIZE_PASS_BEGIN(NameAnonGlobalLegacyPass, "name-anon-globals", - "Provide a name to nameless globals", false, false) -INITIALIZE_PASS_END(NameAnonGlobalLegacyPass, "name-anon-globals", - "Provide a name to nameless globals", false, false) - -namespace llvm { -ModulePass *createNameAnonGlobalPass() { - return new NameAnonGlobalLegacyPass(); -} -} diff --git a/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp deleted file mode 100644 index bdf24d80bd17..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp +++ /dev/null @@ -1,852 +0,0 @@ -//===-- PredicateInfo.cpp - PredicateInfo Builder--------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------===// -// -// This file implements the PredicateInfo class. -// -//===----------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/PredicateInfo.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/CFG.h" -#include "llvm/IR/AssemblyAnnotationWriter.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/DebugCounter.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Transforms/Utils.h" -#include <algorithm> -#define DEBUG_TYPE "predicateinfo" -using namespace llvm; -using namespace PatternMatch; -using namespace llvm::PredicateInfoClasses; - -INITIALIZE_PASS_BEGIN(PredicateInfoPrinterLegacyPass, "print-predicateinfo", - "PredicateInfo Printer", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_END(PredicateInfoPrinterLegacyPass, "print-predicateinfo", - "PredicateInfo Printer", false, false) -static cl::opt<bool> VerifyPredicateInfo( - "verify-predicateinfo", cl::init(false), cl::Hidden, - cl::desc("Verify PredicateInfo in legacy printer pass.")); -DEBUG_COUNTER(RenameCounter, "predicateinfo-rename", - "Controls which variables are renamed with predicateinfo"); - -namespace { -// Given a predicate info that is a type of branching terminator, get the -// branching block. -const BasicBlock *getBranchBlock(const PredicateBase *PB) { - assert(isa<PredicateWithEdge>(PB) && - "Only branches and switches should have PHIOnly defs that " - "require branch blocks."); - return cast<PredicateWithEdge>(PB)->From; -} - -// Given a predicate info that is a type of branching terminator, get the -// branching terminator. -static Instruction *getBranchTerminator(const PredicateBase *PB) { - assert(isa<PredicateWithEdge>(PB) && - "Not a predicate info type we know how to get a terminator from."); - return cast<PredicateWithEdge>(PB)->From->getTerminator(); -} - -// Given a predicate info that is a type of branching terminator, get the -// edge this predicate info represents -const std::pair<BasicBlock *, BasicBlock *> -getBlockEdge(const PredicateBase *PB) { - assert(isa<PredicateWithEdge>(PB) && - "Not a predicate info type we know how to get an edge from."); - const auto *PEdge = cast<PredicateWithEdge>(PB); - return std::make_pair(PEdge->From, PEdge->To); -} -} - -namespace llvm { -namespace PredicateInfoClasses { -enum LocalNum { - // Operations that must appear first in the block. - LN_First, - // Operations that are somewhere in the middle of the block, and are sorted on - // demand. - LN_Middle, - // Operations that must appear last in a block, like successor phi node uses. - LN_Last -}; - -// Associate global and local DFS info with defs and uses, so we can sort them -// into a global domination ordering. -struct ValueDFS { - int DFSIn = 0; - int DFSOut = 0; - unsigned int LocalNum = LN_Middle; - // Only one of Def or Use will be set. - Value *Def = nullptr; - Use *U = nullptr; - // Neither PInfo nor EdgeOnly participate in the ordering - PredicateBase *PInfo = nullptr; - bool EdgeOnly = false; -}; - -// Perform a strict weak ordering on instructions and arguments. -static bool valueComesBefore(OrderedInstructions &OI, const Value *A, - const Value *B) { - auto *ArgA = dyn_cast_or_null<Argument>(A); - auto *ArgB = dyn_cast_or_null<Argument>(B); - if (ArgA && !ArgB) - return true; - if (ArgB && !ArgA) - return false; - if (ArgA && ArgB) - return ArgA->getArgNo() < ArgB->getArgNo(); - return OI.dfsBefore(cast<Instruction>(A), cast<Instruction>(B)); -} - -// This compares ValueDFS structures, creating OrderedBasicBlocks where -// necessary to compare uses/defs in the same block. Doing so allows us to walk -// the minimum number of instructions necessary to compute our def/use ordering. -struct ValueDFS_Compare { - OrderedInstructions &OI; - ValueDFS_Compare(OrderedInstructions &OI) : OI(OI) {} - - bool operator()(const ValueDFS &A, const ValueDFS &B) const { - if (&A == &B) - return false; - // The only case we can't directly compare them is when they in the same - // block, and both have localnum == middle. In that case, we have to use - // comesbefore to see what the real ordering is, because they are in the - // same basic block. - - bool SameBlock = std::tie(A.DFSIn, A.DFSOut) == std::tie(B.DFSIn, B.DFSOut); - - // We want to put the def that will get used for a given set of phi uses, - // before those phi uses. - // So we sort by edge, then by def. - // Note that only phi nodes uses and defs can come last. - if (SameBlock && A.LocalNum == LN_Last && B.LocalNum == LN_Last) - return comparePHIRelated(A, B); - - if (!SameBlock || A.LocalNum != LN_Middle || B.LocalNum != LN_Middle) - return std::tie(A.DFSIn, A.DFSOut, A.LocalNum, A.Def, A.U) < - std::tie(B.DFSIn, B.DFSOut, B.LocalNum, B.Def, B.U); - return localComesBefore(A, B); - } - - // For a phi use, or a non-materialized def, return the edge it represents. - const std::pair<BasicBlock *, BasicBlock *> - getBlockEdge(const ValueDFS &VD) const { - if (!VD.Def && VD.U) { - auto *PHI = cast<PHINode>(VD.U->getUser()); - return std::make_pair(PHI->getIncomingBlock(*VD.U), PHI->getParent()); - } - // This is really a non-materialized def. - return ::getBlockEdge(VD.PInfo); - } - - // For two phi related values, return the ordering. - bool comparePHIRelated(const ValueDFS &A, const ValueDFS &B) const { - auto &ABlockEdge = getBlockEdge(A); - auto &BBlockEdge = getBlockEdge(B); - // Now sort by block edge and then defs before uses. - return std::tie(ABlockEdge, A.Def, A.U) < std::tie(BBlockEdge, B.Def, B.U); - } - - // Get the definition of an instruction that occurs in the middle of a block. - Value *getMiddleDef(const ValueDFS &VD) const { - if (VD.Def) - return VD.Def; - // It's possible for the defs and uses to be null. For branches, the local - // numbering will say the placed predicaeinfos should go first (IE - // LN_beginning), so we won't be in this function. For assumes, we will end - // up here, beause we need to order the def we will place relative to the - // assume. So for the purpose of ordering, we pretend the def is the assume - // because that is where we will insert the info. - if (!VD.U) { - assert(VD.PInfo && - "No def, no use, and no predicateinfo should not occur"); - assert(isa<PredicateAssume>(VD.PInfo) && - "Middle of block should only occur for assumes"); - return cast<PredicateAssume>(VD.PInfo)->AssumeInst; - } - return nullptr; - } - - // Return either the Def, if it's not null, or the user of the Use, if the def - // is null. - const Instruction *getDefOrUser(const Value *Def, const Use *U) const { - if (Def) - return cast<Instruction>(Def); - return cast<Instruction>(U->getUser()); - } - - // This performs the necessary local basic block ordering checks to tell - // whether A comes before B, where both are in the same basic block. - bool localComesBefore(const ValueDFS &A, const ValueDFS &B) const { - auto *ADef = getMiddleDef(A); - auto *BDef = getMiddleDef(B); - - // See if we have real values or uses. If we have real values, we are - // guaranteed they are instructions or arguments. No matter what, we are - // guaranteed they are in the same block if they are instructions. - auto *ArgA = dyn_cast_or_null<Argument>(ADef); - auto *ArgB = dyn_cast_or_null<Argument>(BDef); - - if (ArgA || ArgB) - return valueComesBefore(OI, ArgA, ArgB); - - auto *AInst = getDefOrUser(ADef, A.U); - auto *BInst = getDefOrUser(BDef, B.U); - return valueComesBefore(OI, AInst, BInst); - } -}; - -} // namespace PredicateInfoClasses - -bool PredicateInfo::stackIsInScope(const ValueDFSStack &Stack, - const ValueDFS &VDUse) const { - if (Stack.empty()) - return false; - // If it's a phi only use, make sure it's for this phi node edge, and that the - // use is in a phi node. If it's anything else, and the top of the stack is - // EdgeOnly, we need to pop the stack. We deliberately sort phi uses next to - // the defs they must go with so that we can know it's time to pop the stack - // when we hit the end of the phi uses for a given def. - if (Stack.back().EdgeOnly) { - if (!VDUse.U) - return false; - auto *PHI = dyn_cast<PHINode>(VDUse.U->getUser()); - if (!PHI) - return false; - // Check edge - BasicBlock *EdgePred = PHI->getIncomingBlock(*VDUse.U); - if (EdgePred != getBranchBlock(Stack.back().PInfo)) - return false; - - // Use dominates, which knows how to handle edge dominance. - return DT.dominates(getBlockEdge(Stack.back().PInfo), *VDUse.U); - } - - return (VDUse.DFSIn >= Stack.back().DFSIn && - VDUse.DFSOut <= Stack.back().DFSOut); -} - -void PredicateInfo::popStackUntilDFSScope(ValueDFSStack &Stack, - const ValueDFS &VD) { - while (!Stack.empty() && !stackIsInScope(Stack, VD)) - Stack.pop_back(); -} - -// Convert the uses of Op into a vector of uses, associating global and local -// DFS info with each one. -void PredicateInfo::convertUsesToDFSOrdered( - Value *Op, SmallVectorImpl<ValueDFS> &DFSOrderedSet) { - for (auto &U : Op->uses()) { - if (auto *I = dyn_cast<Instruction>(U.getUser())) { - ValueDFS VD; - // Put the phi node uses in the incoming block. - BasicBlock *IBlock; - if (auto *PN = dyn_cast<PHINode>(I)) { - IBlock = PN->getIncomingBlock(U); - // Make phi node users appear last in the incoming block - // they are from. - VD.LocalNum = LN_Last; - } else { - // If it's not a phi node use, it is somewhere in the middle of the - // block. - IBlock = I->getParent(); - VD.LocalNum = LN_Middle; - } - DomTreeNode *DomNode = DT.getNode(IBlock); - // It's possible our use is in an unreachable block. Skip it if so. - if (!DomNode) - continue; - VD.DFSIn = DomNode->getDFSNumIn(); - VD.DFSOut = DomNode->getDFSNumOut(); - VD.U = &U; - DFSOrderedSet.push_back(VD); - } - } -} - -// Collect relevant operations from Comparison that we may want to insert copies -// for. -void collectCmpOps(CmpInst *Comparison, SmallVectorImpl<Value *> &CmpOperands) { - auto *Op0 = Comparison->getOperand(0); - auto *Op1 = Comparison->getOperand(1); - if (Op0 == Op1) - return; - CmpOperands.push_back(Comparison); - // Only want real values, not constants. Additionally, operands with one use - // are only being used in the comparison, which means they will not be useful - // for us to consider for predicateinfo. - // - if ((isa<Instruction>(Op0) || isa<Argument>(Op0)) && !Op0->hasOneUse()) - CmpOperands.push_back(Op0); - if ((isa<Instruction>(Op1) || isa<Argument>(Op1)) && !Op1->hasOneUse()) - CmpOperands.push_back(Op1); -} - -// Add Op, PB to the list of value infos for Op, and mark Op to be renamed. -void PredicateInfo::addInfoFor(SmallPtrSetImpl<Value *> &OpsToRename, Value *Op, - PredicateBase *PB) { - OpsToRename.insert(Op); - auto &OperandInfo = getOrCreateValueInfo(Op); - AllInfos.push_back(PB); - OperandInfo.Infos.push_back(PB); -} - -// Process an assume instruction and place relevant operations we want to rename -// into OpsToRename. -void PredicateInfo::processAssume(IntrinsicInst *II, BasicBlock *AssumeBB, - SmallPtrSetImpl<Value *> &OpsToRename) { - // See if we have a comparison we support - SmallVector<Value *, 8> CmpOperands; - SmallVector<Value *, 2> ConditionsToProcess; - CmpInst::Predicate Pred; - Value *Operand = II->getOperand(0); - if (m_c_And(m_Cmp(Pred, m_Value(), m_Value()), - m_Cmp(Pred, m_Value(), m_Value())) - .match(II->getOperand(0))) { - ConditionsToProcess.push_back(cast<BinaryOperator>(Operand)->getOperand(0)); - ConditionsToProcess.push_back(cast<BinaryOperator>(Operand)->getOperand(1)); - ConditionsToProcess.push_back(Operand); - } else if (isa<CmpInst>(Operand)) { - - ConditionsToProcess.push_back(Operand); - } - for (auto Cond : ConditionsToProcess) { - if (auto *Cmp = dyn_cast<CmpInst>(Cond)) { - collectCmpOps(Cmp, CmpOperands); - // Now add our copy infos for our operands - for (auto *Op : CmpOperands) { - auto *PA = new PredicateAssume(Op, II, Cmp); - addInfoFor(OpsToRename, Op, PA); - } - CmpOperands.clear(); - } else if (auto *BinOp = dyn_cast<BinaryOperator>(Cond)) { - // Otherwise, it should be an AND. - assert(BinOp->getOpcode() == Instruction::And && - "Should have been an AND"); - auto *PA = new PredicateAssume(BinOp, II, BinOp); - addInfoFor(OpsToRename, BinOp, PA); - } else { - llvm_unreachable("Unknown type of condition"); - } - } -} - -// Process a block terminating branch, and place relevant operations to be -// renamed into OpsToRename. -void PredicateInfo::processBranch(BranchInst *BI, BasicBlock *BranchBB, - SmallPtrSetImpl<Value *> &OpsToRename) { - BasicBlock *FirstBB = BI->getSuccessor(0); - BasicBlock *SecondBB = BI->getSuccessor(1); - SmallVector<BasicBlock *, 2> SuccsToProcess; - SuccsToProcess.push_back(FirstBB); - SuccsToProcess.push_back(SecondBB); - SmallVector<Value *, 2> ConditionsToProcess; - - auto InsertHelper = [&](Value *Op, bool isAnd, bool isOr, Value *Cond) { - for (auto *Succ : SuccsToProcess) { - // Don't try to insert on a self-edge. This is mainly because we will - // eliminate during renaming anyway. - if (Succ == BranchBB) - continue; - bool TakenEdge = (Succ == FirstBB); - // For and, only insert on the true edge - // For or, only insert on the false edge - if ((isAnd && !TakenEdge) || (isOr && TakenEdge)) - continue; - PredicateBase *PB = - new PredicateBranch(Op, BranchBB, Succ, Cond, TakenEdge); - addInfoFor(OpsToRename, Op, PB); - if (!Succ->getSinglePredecessor()) - EdgeUsesOnly.insert({BranchBB, Succ}); - } - }; - - // Match combinations of conditions. - CmpInst::Predicate Pred; - bool isAnd = false; - bool isOr = false; - SmallVector<Value *, 8> CmpOperands; - if (match(BI->getCondition(), m_And(m_Cmp(Pred, m_Value(), m_Value()), - m_Cmp(Pred, m_Value(), m_Value()))) || - match(BI->getCondition(), m_Or(m_Cmp(Pred, m_Value(), m_Value()), - m_Cmp(Pred, m_Value(), m_Value())))) { - auto *BinOp = cast<BinaryOperator>(BI->getCondition()); - if (BinOp->getOpcode() == Instruction::And) - isAnd = true; - else if (BinOp->getOpcode() == Instruction::Or) - isOr = true; - ConditionsToProcess.push_back(BinOp->getOperand(0)); - ConditionsToProcess.push_back(BinOp->getOperand(1)); - ConditionsToProcess.push_back(BI->getCondition()); - } else if (isa<CmpInst>(BI->getCondition())) { - ConditionsToProcess.push_back(BI->getCondition()); - } - for (auto Cond : ConditionsToProcess) { - if (auto *Cmp = dyn_cast<CmpInst>(Cond)) { - collectCmpOps(Cmp, CmpOperands); - // Now add our copy infos for our operands - for (auto *Op : CmpOperands) - InsertHelper(Op, isAnd, isOr, Cmp); - } else if (auto *BinOp = dyn_cast<BinaryOperator>(Cond)) { - // This must be an AND or an OR. - assert((BinOp->getOpcode() == Instruction::And || - BinOp->getOpcode() == Instruction::Or) && - "Should have been an AND or an OR"); - // The actual value of the binop is not subject to the same restrictions - // as the comparison. It's either true or false on the true/false branch. - InsertHelper(BinOp, false, false, BinOp); - } else { - llvm_unreachable("Unknown type of condition"); - } - CmpOperands.clear(); - } -} -// Process a block terminating switch, and place relevant operations to be -// renamed into OpsToRename. -void PredicateInfo::processSwitch(SwitchInst *SI, BasicBlock *BranchBB, - SmallPtrSetImpl<Value *> &OpsToRename) { - Value *Op = SI->getCondition(); - if ((!isa<Instruction>(Op) && !isa<Argument>(Op)) || Op->hasOneUse()) - return; - - // Remember how many outgoing edges there are to every successor. - SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges; - for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) { - BasicBlock *TargetBlock = SI->getSuccessor(i); - ++SwitchEdges[TargetBlock]; - } - - // Now propagate info for each case value - for (auto C : SI->cases()) { - BasicBlock *TargetBlock = C.getCaseSuccessor(); - if (SwitchEdges.lookup(TargetBlock) == 1) { - PredicateSwitch *PS = new PredicateSwitch( - Op, SI->getParent(), TargetBlock, C.getCaseValue(), SI); - addInfoFor(OpsToRename, Op, PS); - if (!TargetBlock->getSinglePredecessor()) - EdgeUsesOnly.insert({BranchBB, TargetBlock}); - } - } -} - -// Build predicate info for our function -void PredicateInfo::buildPredicateInfo() { - DT.updateDFSNumbers(); - // Collect operands to rename from all conditional branch terminators, as well - // as assume statements. - SmallPtrSet<Value *, 8> OpsToRename; - for (auto DTN : depth_first(DT.getRootNode())) { - BasicBlock *BranchBB = DTN->getBlock(); - if (auto *BI = dyn_cast<BranchInst>(BranchBB->getTerminator())) { - if (!BI->isConditional()) - continue; - // Can't insert conditional information if they all go to the same place. - if (BI->getSuccessor(0) == BI->getSuccessor(1)) - continue; - processBranch(BI, BranchBB, OpsToRename); - } else if (auto *SI = dyn_cast<SwitchInst>(BranchBB->getTerminator())) { - processSwitch(SI, BranchBB, OpsToRename); - } - } - for (auto &Assume : AC.assumptions()) { - if (auto *II = dyn_cast_or_null<IntrinsicInst>(Assume)) - if (DT.isReachableFromEntry(II->getParent())) - processAssume(II, II->getParent(), OpsToRename); - } - // Now rename all our operations. - renameUses(OpsToRename); -} - -// Create a ssa_copy declaration with custom mangling, because -// Intrinsic::getDeclaration does not handle overloaded unnamed types properly: -// all unnamed types get mangled to the same string. We use the pointer -// to the type as name here, as it guarantees unique names for different -// types and we remove the declarations when destroying PredicateInfo. -// It is a workaround for PR38117, because solving it in a fully general way is -// tricky (FIXME). -static Function *getCopyDeclaration(Module *M, Type *Ty) { - std::string Name = "llvm.ssa.copy." + utostr((uintptr_t) Ty); - return cast<Function>( - M->getOrInsertFunction(Name, - getType(M->getContext(), Intrinsic::ssa_copy, Ty)) - .getCallee()); -} - -// Given the renaming stack, make all the operands currently on the stack real -// by inserting them into the IR. Return the last operation's value. -Value *PredicateInfo::materializeStack(unsigned int &Counter, - ValueDFSStack &RenameStack, - Value *OrigOp) { - // Find the first thing we have to materialize - auto RevIter = RenameStack.rbegin(); - for (; RevIter != RenameStack.rend(); ++RevIter) - if (RevIter->Def) - break; - - size_t Start = RevIter - RenameStack.rbegin(); - // The maximum number of things we should be trying to materialize at once - // right now is 4, depending on if we had an assume, a branch, and both used - // and of conditions. - for (auto RenameIter = RenameStack.end() - Start; - RenameIter != RenameStack.end(); ++RenameIter) { - auto *Op = - RenameIter == RenameStack.begin() ? OrigOp : (RenameIter - 1)->Def; - ValueDFS &Result = *RenameIter; - auto *ValInfo = Result.PInfo; - // For edge predicates, we can just place the operand in the block before - // the terminator. For assume, we have to place it right before the assume - // to ensure we dominate all of our uses. Always insert right before the - // relevant instruction (terminator, assume), so that we insert in proper - // order in the case of multiple predicateinfo in the same block. - if (isa<PredicateWithEdge>(ValInfo)) { - IRBuilder<> B(getBranchTerminator(ValInfo)); - Function *IF = getCopyDeclaration(F.getParent(), Op->getType()); - if (empty(IF->users())) - CreatedDeclarations.insert(IF); - CallInst *PIC = - B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++)); - PredicateMap.insert({PIC, ValInfo}); - Result.Def = PIC; - } else { - auto *PAssume = dyn_cast<PredicateAssume>(ValInfo); - assert(PAssume && - "Should not have gotten here without it being an assume"); - IRBuilder<> B(PAssume->AssumeInst); - Function *IF = getCopyDeclaration(F.getParent(), Op->getType()); - if (empty(IF->users())) - CreatedDeclarations.insert(IF); - CallInst *PIC = B.CreateCall(IF, Op); - PredicateMap.insert({PIC, ValInfo}); - Result.Def = PIC; - } - } - return RenameStack.back().Def; -} - -// Instead of the standard SSA renaming algorithm, which is O(Number of -// instructions), and walks the entire dominator tree, we walk only the defs + -// uses. The standard SSA renaming algorithm does not really rely on the -// dominator tree except to order the stack push/pops of the renaming stacks, so -// that defs end up getting pushed before hitting the correct uses. This does -// not require the dominator tree, only the *order* of the dominator tree. The -// complete and correct ordering of the defs and uses, in dominator tree is -// contained in the DFS numbering of the dominator tree. So we sort the defs and -// uses into the DFS ordering, and then just use the renaming stack as per -// normal, pushing when we hit a def (which is a predicateinfo instruction), -// popping when we are out of the dfs scope for that def, and replacing any uses -// with top of stack if it exists. In order to handle liveness without -// propagating liveness info, we don't actually insert the predicateinfo -// instruction def until we see a use that it would dominate. Once we see such -// a use, we materialize the predicateinfo instruction in the right place and -// use it. -// -// TODO: Use this algorithm to perform fast single-variable renaming in -// promotememtoreg and memoryssa. -void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) { - // Sort OpsToRename since we are going to iterate it. - SmallVector<Value *, 8> OpsToRename(OpSet.begin(), OpSet.end()); - auto Comparator = [&](const Value *A, const Value *B) { - return valueComesBefore(OI, A, B); - }; - llvm::sort(OpsToRename, Comparator); - ValueDFS_Compare Compare(OI); - // Compute liveness, and rename in O(uses) per Op. - for (auto *Op : OpsToRename) { - LLVM_DEBUG(dbgs() << "Visiting " << *Op << "\n"); - unsigned Counter = 0; - SmallVector<ValueDFS, 16> OrderedUses; - const auto &ValueInfo = getValueInfo(Op); - // Insert the possible copies into the def/use list. - // They will become real copies if we find a real use for them, and never - // created otherwise. - for (auto &PossibleCopy : ValueInfo.Infos) { - ValueDFS VD; - // Determine where we are going to place the copy by the copy type. - // The predicate info for branches always come first, they will get - // materialized in the split block at the top of the block. - // The predicate info for assumes will be somewhere in the middle, - // it will get materialized in front of the assume. - if (const auto *PAssume = dyn_cast<PredicateAssume>(PossibleCopy)) { - VD.LocalNum = LN_Middle; - DomTreeNode *DomNode = DT.getNode(PAssume->AssumeInst->getParent()); - if (!DomNode) - continue; - VD.DFSIn = DomNode->getDFSNumIn(); - VD.DFSOut = DomNode->getDFSNumOut(); - VD.PInfo = PossibleCopy; - OrderedUses.push_back(VD); - } else if (isa<PredicateWithEdge>(PossibleCopy)) { - // If we can only do phi uses, we treat it like it's in the branch - // block, and handle it specially. We know that it goes last, and only - // dominate phi uses. - auto BlockEdge = getBlockEdge(PossibleCopy); - if (EdgeUsesOnly.count(BlockEdge)) { - VD.LocalNum = LN_Last; - auto *DomNode = DT.getNode(BlockEdge.first); - if (DomNode) { - VD.DFSIn = DomNode->getDFSNumIn(); - VD.DFSOut = DomNode->getDFSNumOut(); - VD.PInfo = PossibleCopy; - VD.EdgeOnly = true; - OrderedUses.push_back(VD); - } - } else { - // Otherwise, we are in the split block (even though we perform - // insertion in the branch block). - // Insert a possible copy at the split block and before the branch. - VD.LocalNum = LN_First; - auto *DomNode = DT.getNode(BlockEdge.second); - if (DomNode) { - VD.DFSIn = DomNode->getDFSNumIn(); - VD.DFSOut = DomNode->getDFSNumOut(); - VD.PInfo = PossibleCopy; - OrderedUses.push_back(VD); - } - } - } - } - - convertUsesToDFSOrdered(Op, OrderedUses); - // Here we require a stable sort because we do not bother to try to - // assign an order to the operands the uses represent. Thus, two - // uses in the same instruction do not have a strict sort order - // currently and will be considered equal. We could get rid of the - // stable sort by creating one if we wanted. - llvm::stable_sort(OrderedUses, Compare); - SmallVector<ValueDFS, 8> RenameStack; - // For each use, sorted into dfs order, push values and replaces uses with - // top of stack, which will represent the reaching def. - for (auto &VD : OrderedUses) { - // We currently do not materialize copy over copy, but we should decide if - // we want to. - bool PossibleCopy = VD.PInfo != nullptr; - if (RenameStack.empty()) { - LLVM_DEBUG(dbgs() << "Rename Stack is empty\n"); - } else { - LLVM_DEBUG(dbgs() << "Rename Stack Top DFS numbers are (" - << RenameStack.back().DFSIn << "," - << RenameStack.back().DFSOut << ")\n"); - } - - LLVM_DEBUG(dbgs() << "Current DFS numbers are (" << VD.DFSIn << "," - << VD.DFSOut << ")\n"); - - bool ShouldPush = (VD.Def || PossibleCopy); - bool OutOfScope = !stackIsInScope(RenameStack, VD); - if (OutOfScope || ShouldPush) { - // Sync to our current scope. - popStackUntilDFSScope(RenameStack, VD); - if (ShouldPush) { - RenameStack.push_back(VD); - } - } - // If we get to this point, and the stack is empty we must have a use - // with no renaming needed, just skip it. - if (RenameStack.empty()) - continue; - // Skip values, only want to rename the uses - if (VD.Def || PossibleCopy) - continue; - if (!DebugCounter::shouldExecute(RenameCounter)) { - LLVM_DEBUG(dbgs() << "Skipping execution due to debug counter\n"); - continue; - } - ValueDFS &Result = RenameStack.back(); - - // If the possible copy dominates something, materialize our stack up to - // this point. This ensures every comparison that affects our operation - // ends up with predicateinfo. - if (!Result.Def) - Result.Def = materializeStack(Counter, RenameStack, Op); - - LLVM_DEBUG(dbgs() << "Found replacement " << *Result.Def << " for " - << *VD.U->get() << " in " << *(VD.U->getUser()) - << "\n"); - assert(DT.dominates(cast<Instruction>(Result.Def), *VD.U) && - "Predicateinfo def should have dominated this use"); - VD.U->set(Result.Def); - } - } -} - -PredicateInfo::ValueInfo &PredicateInfo::getOrCreateValueInfo(Value *Operand) { - auto OIN = ValueInfoNums.find(Operand); - if (OIN == ValueInfoNums.end()) { - // This will grow it - ValueInfos.resize(ValueInfos.size() + 1); - // This will use the new size and give us a 0 based number of the info - auto InsertResult = ValueInfoNums.insert({Operand, ValueInfos.size() - 1}); - assert(InsertResult.second && "Value info number already existed?"); - return ValueInfos[InsertResult.first->second]; - } - return ValueInfos[OIN->second]; -} - -const PredicateInfo::ValueInfo & -PredicateInfo::getValueInfo(Value *Operand) const { - auto OINI = ValueInfoNums.lookup(Operand); - assert(OINI != 0 && "Operand was not really in the Value Info Numbers"); - assert(OINI < ValueInfos.size() && - "Value Info Number greater than size of Value Info Table"); - return ValueInfos[OINI]; -} - -PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT, - AssumptionCache &AC) - : F(F), DT(DT), AC(AC), OI(&DT) { - // Push an empty operand info so that we can detect 0 as not finding one - ValueInfos.resize(1); - buildPredicateInfo(); -} - -// Remove all declarations we created . The PredicateInfo consumers are -// responsible for remove the ssa_copy calls created. -PredicateInfo::~PredicateInfo() { - // Collect function pointers in set first, as SmallSet uses a SmallVector - // internally and we have to remove the asserting value handles first. - SmallPtrSet<Function *, 20> FunctionPtrs; - for (auto &F : CreatedDeclarations) - FunctionPtrs.insert(&*F); - CreatedDeclarations.clear(); - - for (Function *F : FunctionPtrs) { - assert(F->user_begin() == F->user_end() && - "PredicateInfo consumer did not remove all SSA copies."); - F->eraseFromParent(); - } -} - -void PredicateInfo::verifyPredicateInfo() const {} - -char PredicateInfoPrinterLegacyPass::ID = 0; - -PredicateInfoPrinterLegacyPass::PredicateInfoPrinterLegacyPass() - : FunctionPass(ID) { - initializePredicateInfoPrinterLegacyPassPass( - *PassRegistry::getPassRegistry()); -} - -void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequiredTransitive<DominatorTreeWrapperPass>(); - AU.addRequired<AssumptionCacheTracker>(); -} - -// Replace ssa_copy calls created by PredicateInfo with their operand. -static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) { - for (auto I = inst_begin(F), E = inst_end(F); I != E;) { - Instruction *Inst = &*I++; - const auto *PI = PredInfo.getPredicateInfoFor(Inst); - auto *II = dyn_cast<IntrinsicInst>(Inst); - if (!PI || !II || II->getIntrinsicID() != Intrinsic::ssa_copy) - continue; - - Inst->replaceAllUsesWith(II->getOperand(0)); - Inst->eraseFromParent(); - } -} - -bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) { - auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - auto PredInfo = make_unique<PredicateInfo>(F, DT, AC); - PredInfo->print(dbgs()); - if (VerifyPredicateInfo) - PredInfo->verifyPredicateInfo(); - - replaceCreatedSSACopys(*PredInfo, F); - return false; -} - -PreservedAnalyses PredicateInfoPrinterPass::run(Function &F, - FunctionAnalysisManager &AM) { - auto &DT = AM.getResult<DominatorTreeAnalysis>(F); - auto &AC = AM.getResult<AssumptionAnalysis>(F); - OS << "PredicateInfo for function: " << F.getName() << "\n"; - auto PredInfo = make_unique<PredicateInfo>(F, DT, AC); - PredInfo->print(OS); - - replaceCreatedSSACopys(*PredInfo, F); - return PreservedAnalyses::all(); -} - -/// An assembly annotator class to print PredicateInfo information in -/// comments. -class PredicateInfoAnnotatedWriter : public AssemblyAnnotationWriter { - friend class PredicateInfo; - const PredicateInfo *PredInfo; - -public: - PredicateInfoAnnotatedWriter(const PredicateInfo *M) : PredInfo(M) {} - - virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, - formatted_raw_ostream &OS) {} - - virtual void emitInstructionAnnot(const Instruction *I, - formatted_raw_ostream &OS) { - if (const auto *PI = PredInfo->getPredicateInfoFor(I)) { - OS << "; Has predicate info\n"; - if (const auto *PB = dyn_cast<PredicateBranch>(PI)) { - OS << "; branch predicate info { TrueEdge: " << PB->TrueEdge - << " Comparison:" << *PB->Condition << " Edge: ["; - PB->From->printAsOperand(OS); - OS << ","; - PB->To->printAsOperand(OS); - OS << "] }\n"; - } else if (const auto *PS = dyn_cast<PredicateSwitch>(PI)) { - OS << "; switch predicate info { CaseValue: " << *PS->CaseValue - << " Switch:" << *PS->Switch << " Edge: ["; - PS->From->printAsOperand(OS); - OS << ","; - PS->To->printAsOperand(OS); - OS << "] }\n"; - } else if (const auto *PA = dyn_cast<PredicateAssume>(PI)) { - OS << "; assume predicate info {" - << " Comparison:" << *PA->Condition << " }\n"; - } - } - } -}; - -void PredicateInfo::print(raw_ostream &OS) const { - PredicateInfoAnnotatedWriter Writer(this); - F.print(OS, &Writer); -} - -void PredicateInfo::dump() const { - PredicateInfoAnnotatedWriter Writer(this); - F.print(dbgs(), &Writer); -} - -PreservedAnalyses PredicateInfoVerifierPass::run(Function &F, - FunctionAnalysisManager &AM) { - auto &DT = AM.getResult<DominatorTreeAnalysis>(F); - auto &AC = AM.getResult<AssumptionAnalysis>(F); - make_unique<PredicateInfo>(F, DT, AC)->verifyPredicateInfo(); - - return PreservedAnalyses::all(); -} -} diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp deleted file mode 100644 index d58e1ea574ef..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ /dev/null @@ -1,1007 +0,0 @@ -//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file promotes memory references to be register references. It promotes -// alloca instructions which only have loads and stores as uses. An alloca is -// transformed by using iterated dominator frontiers to place PHI nodes, then -// traversing the function in depth-first order to rewrite loads and stores as -// appropriate. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/TinyPtrVector.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/IteratedDominanceFrontier.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DIBuilder.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/User.h" -#include "llvm/Support/Casting.h" -#include "llvm/Transforms/Utils/PromoteMemToReg.h" -#include <algorithm> -#include <cassert> -#include <iterator> -#include <utility> -#include <vector> - -using namespace llvm; - -#define DEBUG_TYPE "mem2reg" - -STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); -STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); -STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); -STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); - -bool llvm::isAllocaPromotable(const AllocaInst *AI) { - // FIXME: If the memory unit is of pointer or integer type, we can permit - // assignments to subsections of the memory unit. - unsigned AS = AI->getType()->getAddressSpace(); - - // Only allow direct and non-volatile loads and stores... - for (const User *U : AI->users()) { - if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { - // Note that atomic loads can be transformed; atomic semantics do - // not have any meaning for a local alloca. - if (LI->isVolatile()) - return false; - } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { - if (SI->getOperand(0) == AI) - return false; // Don't allow a store OF the AI, only INTO the AI. - // Note that atomic stores can be transformed; atomic semantics do - // not have any meaning for a local alloca. - if (SI->isVolatile()) - return false; - } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { - if (!II->isLifetimeStartOrEnd()) - return false; - } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { - if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) - return false; - if (!onlyUsedByLifetimeMarkers(BCI)) - return false; - } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { - if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) - return false; - if (!GEPI->hasAllZeroIndices()) - return false; - if (!onlyUsedByLifetimeMarkers(GEPI)) - return false; - } else { - return false; - } - } - - return true; -} - -namespace { - -struct AllocaInfo { - SmallVector<BasicBlock *, 32> DefiningBlocks; - SmallVector<BasicBlock *, 32> UsingBlocks; - - StoreInst *OnlyStore; - BasicBlock *OnlyBlock; - bool OnlyUsedInOneBlock; - - TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares; - - void clear() { - DefiningBlocks.clear(); - UsingBlocks.clear(); - OnlyStore = nullptr; - OnlyBlock = nullptr; - OnlyUsedInOneBlock = true; - DbgDeclares.clear(); - } - - /// Scan the uses of the specified alloca, filling in the AllocaInfo used - /// by the rest of the pass to reason about the uses of this alloca. - void AnalyzeAlloca(AllocaInst *AI) { - clear(); - - // As we scan the uses of the alloca instruction, keep track of stores, - // and decide whether all of the loads and stores to the alloca are within - // the same basic block. - for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { - Instruction *User = cast<Instruction>(*UI++); - - if (StoreInst *SI = dyn_cast<StoreInst>(User)) { - // Remember the basic blocks which define new values for the alloca - DefiningBlocks.push_back(SI->getParent()); - OnlyStore = SI; - } else { - LoadInst *LI = cast<LoadInst>(User); - // Otherwise it must be a load instruction, keep track of variable - // reads. - UsingBlocks.push_back(LI->getParent()); - } - - if (OnlyUsedInOneBlock) { - if (!OnlyBlock) - OnlyBlock = User->getParent(); - else if (OnlyBlock != User->getParent()) - OnlyUsedInOneBlock = false; - } - } - - DbgDeclares = FindDbgAddrUses(AI); - } -}; - -/// Data package used by RenamePass(). -struct RenamePassData { - using ValVector = std::vector<Value *>; - using LocationVector = std::vector<DebugLoc>; - - RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V, LocationVector L) - : BB(B), Pred(P), Values(std::move(V)), Locations(std::move(L)) {} - - BasicBlock *BB; - BasicBlock *Pred; - ValVector Values; - LocationVector Locations; -}; - -/// This assigns and keeps a per-bb relative ordering of load/store -/// instructions in the block that directly load or store an alloca. -/// -/// This functionality is important because it avoids scanning large basic -/// blocks multiple times when promoting many allocas in the same block. -class LargeBlockInfo { - /// For each instruction that we track, keep the index of the - /// instruction. - /// - /// The index starts out as the number of the instruction from the start of - /// the block. - DenseMap<const Instruction *, unsigned> InstNumbers; - -public: - - /// This code only looks at accesses to allocas. - static bool isInterestingInstruction(const Instruction *I) { - return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || - (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); - } - - /// Get or calculate the index of the specified instruction. - unsigned getInstructionIndex(const Instruction *I) { - assert(isInterestingInstruction(I) && - "Not a load/store to/from an alloca?"); - - // If we already have this instruction number, return it. - DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); - if (It != InstNumbers.end()) - return It->second; - - // Scan the whole block to get the instruction. This accumulates - // information for every interesting instruction in the block, in order to - // avoid gratuitus rescans. - const BasicBlock *BB = I->getParent(); - unsigned InstNo = 0; - for (const Instruction &BBI : *BB) - if (isInterestingInstruction(&BBI)) - InstNumbers[&BBI] = InstNo++; - It = InstNumbers.find(I); - - assert(It != InstNumbers.end() && "Didn't insert instruction?"); - return It->second; - } - - void deleteValue(const Instruction *I) { InstNumbers.erase(I); } - - void clear() { InstNumbers.clear(); } -}; - -struct PromoteMem2Reg { - /// The alloca instructions being promoted. - std::vector<AllocaInst *> Allocas; - - DominatorTree &DT; - DIBuilder DIB; - - /// A cache of @llvm.assume intrinsics used by SimplifyInstruction. - AssumptionCache *AC; - - const SimplifyQuery SQ; - - /// Reverse mapping of Allocas. - DenseMap<AllocaInst *, unsigned> AllocaLookup; - - /// The PhiNodes we're adding. - /// - /// That map is used to simplify some Phi nodes as we iterate over it, so - /// it should have deterministic iterators. We could use a MapVector, but - /// since we already maintain a map from BasicBlock* to a stable numbering - /// (BBNumbers), the DenseMap is more efficient (also supports removal). - DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes; - - /// For each PHI node, keep track of which entry in Allocas it corresponds - /// to. - DenseMap<PHINode *, unsigned> PhiToAllocaMap; - - /// For each alloca, we keep track of the dbg.declare intrinsic that - /// describes it, if any, so that we can convert it to a dbg.value - /// intrinsic if the alloca gets promoted. - SmallVector<TinyPtrVector<DbgVariableIntrinsic *>, 8> AllocaDbgDeclares; - - /// The set of basic blocks the renamer has already visited. - SmallPtrSet<BasicBlock *, 16> Visited; - - /// Contains a stable numbering of basic blocks to avoid non-determinstic - /// behavior. - DenseMap<BasicBlock *, unsigned> BBNumbers; - - /// Lazily compute the number of predecessors a block has. - DenseMap<const BasicBlock *, unsigned> BBNumPreds; - -public: - PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, - AssumptionCache *AC) - : Allocas(Allocas.begin(), Allocas.end()), DT(DT), - DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false), - AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(), - nullptr, &DT, AC) {} - - void run(); - -private: - void RemoveFromAllocasList(unsigned &AllocaIdx) { - Allocas[AllocaIdx] = Allocas.back(); - Allocas.pop_back(); - --AllocaIdx; - } - - unsigned getNumPreds(const BasicBlock *BB) { - unsigned &NP = BBNumPreds[BB]; - if (NP == 0) - NP = pred_size(BB) + 1; - return NP - 1; - } - - void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, - const SmallPtrSetImpl<BasicBlock *> &DefBlocks, - SmallPtrSetImpl<BasicBlock *> &LiveInBlocks); - void RenamePass(BasicBlock *BB, BasicBlock *Pred, - RenamePassData::ValVector &IncVals, - RenamePassData::LocationVector &IncLocs, - std::vector<RenamePassData> &Worklist); - bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); -}; - -} // end anonymous namespace - -/// Given a LoadInst LI this adds assume(LI != null) after it. -static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) { - Function *AssumeIntrinsic = - Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume); - ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI, - Constant::getNullValue(LI->getType())); - LoadNotNull->insertAfter(LI); - CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull}); - CI->insertAfter(LoadNotNull); - AC->registerAssumption(CI); -} - -static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { - // Knowing that this alloca is promotable, we know that it's safe to kill all - // instructions except for load and store. - - for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) { - Instruction *I = cast<Instruction>(*UI); - ++UI; - if (isa<LoadInst>(I) || isa<StoreInst>(I)) - continue; - - if (!I->getType()->isVoidTy()) { - // The only users of this bitcast/GEP instruction are lifetime intrinsics. - // Follow the use/def chain to erase them now instead of leaving it for - // dead code elimination later. - for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) { - Instruction *Inst = cast<Instruction>(*UUI); - ++UUI; - Inst->eraseFromParent(); - } - } - I->eraseFromParent(); - } -} - -/// Rewrite as many loads as possible given a single store. -/// -/// When there is only a single store, we can use the domtree to trivially -/// replace all of the dominated loads with the stored value. Do so, and return -/// true if this has successfully promoted the alloca entirely. If this returns -/// false there were some loads which were not dominated by the single store -/// and thus must be phi-ed with undef. We fall back to the standard alloca -/// promotion algorithm in that case. -static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, - LargeBlockInfo &LBI, const DataLayout &DL, - DominatorTree &DT, AssumptionCache *AC) { - StoreInst *OnlyStore = Info.OnlyStore; - bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); - BasicBlock *StoreBB = OnlyStore->getParent(); - int StoreIndex = -1; - - // Clear out UsingBlocks. We will reconstruct it here if needed. - Info.UsingBlocks.clear(); - - for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { - Instruction *UserInst = cast<Instruction>(*UI++); - if (UserInst == OnlyStore) - continue; - LoadInst *LI = cast<LoadInst>(UserInst); - - // Okay, if we have a load from the alloca, we want to replace it with the - // only value stored to the alloca. We can do this if the value is - // dominated by the store. If not, we use the rest of the mem2reg machinery - // to insert the phi nodes as needed. - if (!StoringGlobalVal) { // Non-instructions are always dominated. - if (LI->getParent() == StoreBB) { - // If we have a use that is in the same block as the store, compare the - // indices of the two instructions to see which one came first. If the - // load came before the store, we can't handle it. - if (StoreIndex == -1) - StoreIndex = LBI.getInstructionIndex(OnlyStore); - - if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { - // Can't handle this load, bail out. - Info.UsingBlocks.push_back(StoreBB); - continue; - } - } else if (!DT.dominates(StoreBB, LI->getParent())) { - // If the load and store are in different blocks, use BB dominance to - // check their relationships. If the store doesn't dom the use, bail - // out. - Info.UsingBlocks.push_back(LI->getParent()); - continue; - } - } - - // Otherwise, we *can* safely rewrite this load. - Value *ReplVal = OnlyStore->getOperand(0); - // If the replacement value is the load, this must occur in unreachable - // code. - if (ReplVal == LI) - ReplVal = UndefValue::get(LI->getType()); - - // If the load was marked as nonnull we don't want to lose - // that information when we erase this Load. So we preserve - // it with an assume. - if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && - !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT)) - addAssumeNonNull(AC, LI); - - LI->replaceAllUsesWith(ReplVal); - LI->eraseFromParent(); - LBI.deleteValue(LI); - } - - // Finally, after the scan, check to see if the store is all that is left. - if (!Info.UsingBlocks.empty()) - return false; // If not, we'll have to fall back for the remainder. - - // Record debuginfo for the store and remove the declaration's - // debuginfo. - for (DbgVariableIntrinsic *DII : Info.DbgDeclares) { - DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); - ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB); - DII->eraseFromParent(); - } - // Remove the (now dead) store and alloca. - Info.OnlyStore->eraseFromParent(); - LBI.deleteValue(Info.OnlyStore); - - AI->eraseFromParent(); - return true; -} - -/// Many allocas are only used within a single basic block. If this is the -/// case, avoid traversing the CFG and inserting a lot of potentially useless -/// PHI nodes by just performing a single linear pass over the basic block -/// using the Alloca. -/// -/// If we cannot promote this alloca (because it is read before it is written), -/// return false. This is necessary in cases where, due to control flow, the -/// alloca is undefined only on some control flow paths. e.g. code like -/// this is correct in LLVM IR: -/// // A is an alloca with no stores so far -/// for (...) { -/// int t = *A; -/// if (!first_iteration) -/// use(t); -/// *A = 42; -/// } -static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, - LargeBlockInfo &LBI, - const DataLayout &DL, - DominatorTree &DT, - AssumptionCache *AC) { - // The trickiest case to handle is when we have large blocks. Because of this, - // this code is optimized assuming that large blocks happen. This does not - // significantly pessimize the small block case. This uses LargeBlockInfo to - // make it efficient to get the index of various operations in the block. - - // Walk the use-def list of the alloca, getting the locations of all stores. - using StoresByIndexTy = SmallVector<std::pair<unsigned, StoreInst *>, 64>; - StoresByIndexTy StoresByIndex; - - for (User *U : AI->users()) - if (StoreInst *SI = dyn_cast<StoreInst>(U)) - StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); - - // Sort the stores by their index, making it efficient to do a lookup with a - // binary search. - llvm::sort(StoresByIndex, less_first()); - - // Walk all of the loads from this alloca, replacing them with the nearest - // store above them, if any. - for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { - LoadInst *LI = dyn_cast<LoadInst>(*UI++); - if (!LI) - continue; - - unsigned LoadIdx = LBI.getInstructionIndex(LI); - - // Find the nearest store that has a lower index than this load. - StoresByIndexTy::iterator I = llvm::lower_bound( - StoresByIndex, - std::make_pair(LoadIdx, static_cast<StoreInst *>(nullptr)), - less_first()); - if (I == StoresByIndex.begin()) { - if (StoresByIndex.empty()) - // If there are no stores, the load takes the undef value. - LI->replaceAllUsesWith(UndefValue::get(LI->getType())); - else - // There is no store before this load, bail out (load may be affected - // by the following stores - see main comment). - return false; - } else { - // Otherwise, there was a store before this load, the load takes its value. - // Note, if the load was marked as nonnull we don't want to lose that - // information when we erase it. So we preserve it with an assume. - Value *ReplVal = std::prev(I)->second->getOperand(0); - if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && - !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT)) - addAssumeNonNull(AC, LI); - - // If the replacement value is the load, this must occur in unreachable - // code. - if (ReplVal == LI) - ReplVal = UndefValue::get(LI->getType()); - - LI->replaceAllUsesWith(ReplVal); - } - - LI->eraseFromParent(); - LBI.deleteValue(LI); - } - - // Remove the (now dead) stores and alloca. - while (!AI->use_empty()) { - StoreInst *SI = cast<StoreInst>(AI->user_back()); - // Record debuginfo for the store before removing it. - for (DbgVariableIntrinsic *DII : Info.DbgDeclares) { - DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); - ConvertDebugDeclareToDebugValue(DII, SI, DIB); - } - SI->eraseFromParent(); - LBI.deleteValue(SI); - } - - AI->eraseFromParent(); - - // The alloca's debuginfo can be removed as well. - for (DbgVariableIntrinsic *DII : Info.DbgDeclares) - DII->eraseFromParent(); - - ++NumLocalPromoted; - return true; -} - -void PromoteMem2Reg::run() { - Function &F = *DT.getRoot()->getParent(); - - AllocaDbgDeclares.resize(Allocas.size()); - - AllocaInfo Info; - LargeBlockInfo LBI; - ForwardIDFCalculator IDF(DT); - - for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { - AllocaInst *AI = Allocas[AllocaNum]; - - assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); - assert(AI->getParent()->getParent() == &F && - "All allocas should be in the same function, which is same as DF!"); - - removeLifetimeIntrinsicUsers(AI); - - if (AI->use_empty()) { - // If there are no uses of the alloca, just delete it now. - AI->eraseFromParent(); - - // Remove the alloca from the Allocas list, since it has been processed - RemoveFromAllocasList(AllocaNum); - ++NumDeadAlloca; - continue; - } - - // Calculate the set of read and write-locations for each alloca. This is - // analogous to finding the 'uses' and 'definitions' of each variable. - Info.AnalyzeAlloca(AI); - - // If there is only a single store to this value, replace any loads of - // it that are directly dominated by the definition with the value stored. - if (Info.DefiningBlocks.size() == 1) { - if (rewriteSingleStoreAlloca(AI, Info, LBI, SQ.DL, DT, AC)) { - // The alloca has been processed, move on. - RemoveFromAllocasList(AllocaNum); - ++NumSingleStore; - continue; - } - } - - // If the alloca is only read and written in one basic block, just perform a - // linear sweep over the block to eliminate it. - if (Info.OnlyUsedInOneBlock && - promoteSingleBlockAlloca(AI, Info, LBI, SQ.DL, DT, AC)) { - // The alloca has been processed, move on. - RemoveFromAllocasList(AllocaNum); - continue; - } - - // If we haven't computed a numbering for the BB's in the function, do so - // now. - if (BBNumbers.empty()) { - unsigned ID = 0; - for (auto &BB : F) - BBNumbers[&BB] = ID++; - } - - // Remember the dbg.declare intrinsic describing this alloca, if any. - if (!Info.DbgDeclares.empty()) - AllocaDbgDeclares[AllocaNum] = Info.DbgDeclares; - - // Keep the reverse mapping of the 'Allocas' array for the rename pass. - AllocaLookup[Allocas[AllocaNum]] = AllocaNum; - - // At this point, we're committed to promoting the alloca using IDF's, and - // the standard SSA construction algorithm. Determine which blocks need PHI - // nodes and see if we can optimize out some work by avoiding insertion of - // dead phi nodes. - - // Unique the set of defining blocks for efficient lookup. - SmallPtrSet<BasicBlock *, 32> DefBlocks(Info.DefiningBlocks.begin(), - Info.DefiningBlocks.end()); - - // Determine which blocks the value is live in. These are blocks which lead - // to uses. - SmallPtrSet<BasicBlock *, 32> LiveInBlocks; - ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); - - // At this point, we're committed to promoting the alloca using IDF's, and - // the standard SSA construction algorithm. Determine which blocks need phi - // nodes and see if we can optimize out some work by avoiding insertion of - // dead phi nodes. - IDF.setLiveInBlocks(LiveInBlocks); - IDF.setDefiningBlocks(DefBlocks); - SmallVector<BasicBlock *, 32> PHIBlocks; - IDF.calculate(PHIBlocks); - llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) { - return BBNumbers.find(A)->second < BBNumbers.find(B)->second; - }); - - unsigned CurrentVersion = 0; - for (BasicBlock *BB : PHIBlocks) - QueuePhiNode(BB, AllocaNum, CurrentVersion); - } - - if (Allocas.empty()) - return; // All of the allocas must have been trivial! - - LBI.clear(); - - // Set the incoming values for the basic block to be null values for all of - // the alloca's. We do this in case there is a load of a value that has not - // been stored yet. In this case, it will get this null value. - RenamePassData::ValVector Values(Allocas.size()); - for (unsigned i = 0, e = Allocas.size(); i != e; ++i) - Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); - - // When handling debug info, treat all incoming values as if they have unknown - // locations until proven otherwise. - RenamePassData::LocationVector Locations(Allocas.size()); - - // Walks all basic blocks in the function performing the SSA rename algorithm - // and inserting the phi nodes we marked as necessary - std::vector<RenamePassData> RenamePassWorkList; - RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values), - std::move(Locations)); - do { - RenamePassData RPD = std::move(RenamePassWorkList.back()); - RenamePassWorkList.pop_back(); - // RenamePass may add new worklist entries. - RenamePass(RPD.BB, RPD.Pred, RPD.Values, RPD.Locations, RenamePassWorkList); - } while (!RenamePassWorkList.empty()); - - // The renamer uses the Visited set to avoid infinite loops. Clear it now. - Visited.clear(); - - // Remove the allocas themselves from the function. - for (Instruction *A : Allocas) { - // If there are any uses of the alloca instructions left, they must be in - // unreachable basic blocks that were not processed by walking the dominator - // tree. Just delete the users now. - if (!A->use_empty()) - A->replaceAllUsesWith(UndefValue::get(A->getType())); - A->eraseFromParent(); - } - - // Remove alloca's dbg.declare instrinsics from the function. - for (auto &Declares : AllocaDbgDeclares) - for (auto *DII : Declares) - DII->eraseFromParent(); - - // Loop over all of the PHI nodes and see if there are any that we can get - // rid of because they merge all of the same incoming values. This can - // happen due to undef values coming into the PHI nodes. This process is - // iterative, because eliminating one PHI node can cause others to be removed. - bool EliminatedAPHI = true; - while (EliminatedAPHI) { - EliminatedAPHI = false; - - // Iterating over NewPhiNodes is deterministic, so it is safe to try to - // simplify and RAUW them as we go. If it was not, we could add uses to - // the values we replace with in a non-deterministic order, thus creating - // non-deterministic def->use chains. - for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator - I = NewPhiNodes.begin(), - E = NewPhiNodes.end(); - I != E;) { - PHINode *PN = I->second; - - // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = SimplifyInstruction(PN, SQ)) { - PN->replaceAllUsesWith(V); - PN->eraseFromParent(); - NewPhiNodes.erase(I++); - EliminatedAPHI = true; - continue; - } - ++I; - } - } - - // At this point, the renamer has added entries to PHI nodes for all reachable - // code. Unfortunately, there may be unreachable blocks which the renamer - // hasn't traversed. If this is the case, the PHI nodes may not - // have incoming values for all predecessors. Loop over all PHI nodes we have - // created, inserting undef values if they are missing any incoming values. - for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator - I = NewPhiNodes.begin(), - E = NewPhiNodes.end(); - I != E; ++I) { - // We want to do this once per basic block. As such, only process a block - // when we find the PHI that is the first entry in the block. - PHINode *SomePHI = I->second; - BasicBlock *BB = SomePHI->getParent(); - if (&BB->front() != SomePHI) - continue; - - // Only do work here if there the PHI nodes are missing incoming values. We - // know that all PHI nodes that were inserted in a block will have the same - // number of incoming values, so we can just check any of them. - if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) - continue; - - // Get the preds for BB. - SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); - - // Ok, now we know that all of the PHI nodes are missing entries for some - // basic blocks. Start by sorting the incoming predecessors for efficient - // access. - auto CompareBBNumbers = [this](BasicBlock *A, BasicBlock *B) { - return BBNumbers.find(A)->second < BBNumbers.find(B)->second; - }; - llvm::sort(Preds, CompareBBNumbers); - - // Now we loop through all BB's which have entries in SomePHI and remove - // them from the Preds list. - for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { - // Do a log(n) search of the Preds list for the entry we want. - SmallVectorImpl<BasicBlock *>::iterator EntIt = llvm::lower_bound( - Preds, SomePHI->getIncomingBlock(i), CompareBBNumbers); - assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) && - "PHI node has entry for a block which is not a predecessor!"); - - // Remove the entry - Preds.erase(EntIt); - } - - // At this point, the blocks left in the preds list must have dummy - // entries inserted into every PHI nodes for the block. Update all the phi - // nodes in this block that we are inserting (there could be phis before - // mem2reg runs). - unsigned NumBadPreds = SomePHI->getNumIncomingValues(); - BasicBlock::iterator BBI = BB->begin(); - while ((SomePHI = dyn_cast<PHINode>(BBI++)) && - SomePHI->getNumIncomingValues() == NumBadPreds) { - Value *UndefVal = UndefValue::get(SomePHI->getType()); - for (BasicBlock *Pred : Preds) - SomePHI->addIncoming(UndefVal, Pred); - } - } - - NewPhiNodes.clear(); -} - -/// Determine which blocks the value is live in. -/// -/// These are blocks which lead to uses. Knowing this allows us to avoid -/// inserting PHI nodes into blocks which don't lead to uses (thus, the -/// inserted phi nodes would be dead). -void PromoteMem2Reg::ComputeLiveInBlocks( - AllocaInst *AI, AllocaInfo &Info, - const SmallPtrSetImpl<BasicBlock *> &DefBlocks, - SmallPtrSetImpl<BasicBlock *> &LiveInBlocks) { - // To determine liveness, we must iterate through the predecessors of blocks - // where the def is live. Blocks are added to the worklist if we need to - // check their predecessors. Start with all the using blocks. - SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), - Info.UsingBlocks.end()); - - // If any of the using blocks is also a definition block, check to see if the - // definition occurs before or after the use. If it happens before the use, - // the value isn't really live-in. - for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { - BasicBlock *BB = LiveInBlockWorklist[i]; - if (!DefBlocks.count(BB)) - continue; - - // Okay, this is a block that both uses and defines the value. If the first - // reference to the alloca is a def (store), then we know it isn't live-in. - for (BasicBlock::iterator I = BB->begin();; ++I) { - if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - if (SI->getOperand(1) != AI) - continue; - - // We found a store to the alloca before a load. The alloca is not - // actually live-in here. - LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); - LiveInBlockWorklist.pop_back(); - --i; - --e; - break; - } - - if (LoadInst *LI = dyn_cast<LoadInst>(I)) - // Okay, we found a load before a store to the alloca. It is actually - // live into this block. - if (LI->getOperand(0) == AI) - break; - } - } - - // Now that we have a set of blocks where the phi is live-in, recursively add - // their predecessors until we find the full region the value is live. - while (!LiveInBlockWorklist.empty()) { - BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); - - // The block really is live in here, insert it into the set. If already in - // the set, then it has already been processed. - if (!LiveInBlocks.insert(BB).second) - continue; - - // Since the value is live into BB, it is either defined in a predecessor or - // live into it to. Add the preds to the worklist unless they are a - // defining block. - for (BasicBlock *P : predecessors(BB)) { - // The value is not live into a predecessor if it defines the value. - if (DefBlocks.count(P)) - continue; - - // Otherwise it is, add to the worklist. - LiveInBlockWorklist.push_back(P); - } - } -} - -/// Queue a phi-node to be added to a basic-block for a specific Alloca. -/// -/// Returns true if there wasn't already a phi-node for that variable -bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, - unsigned &Version) { - // Look up the basic-block in question. - PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)]; - - // If the BB already has a phi node added for the i'th alloca then we're done! - if (PN) - return false; - - // Create a PhiNode using the dereferenced type... and add the phi-node to the - // BasicBlock. - PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), - Allocas[AllocaNo]->getName() + "." + Twine(Version++), - &BB->front()); - ++NumPHIInsert; - PhiToAllocaMap[PN] = AllocaNo; - return true; -} - -/// Update the debug location of a phi. \p ApplyMergedLoc indicates whether to -/// create a merged location incorporating \p DL, or to set \p DL directly. -static void updateForIncomingValueLocation(PHINode *PN, DebugLoc DL, - bool ApplyMergedLoc) { - if (ApplyMergedLoc) - PN->applyMergedLocation(PN->getDebugLoc(), DL); - else - PN->setDebugLoc(DL); -} - -/// Recursively traverse the CFG of the function, renaming loads and -/// stores to the allocas which we are promoting. -/// -/// IncomingVals indicates what value each Alloca contains on exit from the -/// predecessor block Pred. -void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, - RenamePassData::ValVector &IncomingVals, - RenamePassData::LocationVector &IncomingLocs, - std::vector<RenamePassData> &Worklist) { -NextIteration: - // If we are inserting any phi nodes into this BB, they will already be in the - // block. - if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { - // If we have PHI nodes to update, compute the number of edges from Pred to - // BB. - if (PhiToAllocaMap.count(APN)) { - // We want to be able to distinguish between PHI nodes being inserted by - // this invocation of mem2reg from those phi nodes that already existed in - // the IR before mem2reg was run. We determine that APN is being inserted - // because it is missing incoming edges. All other PHI nodes being - // inserted by this pass of mem2reg will have the same number of incoming - // operands so far. Remember this count. - unsigned NewPHINumOperands = APN->getNumOperands(); - - unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB); - assert(NumEdges && "Must be at least one edge from Pred to BB!"); - - // Add entries for all the phis. - BasicBlock::iterator PNI = BB->begin(); - do { - unsigned AllocaNo = PhiToAllocaMap[APN]; - - // Update the location of the phi node. - updateForIncomingValueLocation(APN, IncomingLocs[AllocaNo], - APN->getNumIncomingValues() > 0); - - // Add N incoming values to the PHI node. - for (unsigned i = 0; i != NumEdges; ++i) - APN->addIncoming(IncomingVals[AllocaNo], Pred); - - // The currently active variable for this block is now the PHI. - IncomingVals[AllocaNo] = APN; - for (DbgVariableIntrinsic *DII : AllocaDbgDeclares[AllocaNo]) - ConvertDebugDeclareToDebugValue(DII, APN, DIB); - - // Get the next phi node. - ++PNI; - APN = dyn_cast<PHINode>(PNI); - if (!APN) - break; - - // Verify that it is missing entries. If not, it is not being inserted - // by this mem2reg invocation so we want to ignore it. - } while (APN->getNumOperands() == NewPHINumOperands); - } - } - - // Don't revisit blocks. - if (!Visited.insert(BB).second) - return; - - for (BasicBlock::iterator II = BB->begin(); !II->isTerminator();) { - Instruction *I = &*II++; // get the instruction, increment iterator - - if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); - if (!Src) - continue; - - DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src); - if (AI == AllocaLookup.end()) - continue; - - Value *V = IncomingVals[AI->second]; - - // If the load was marked as nonnull we don't want to lose - // that information when we erase this Load. So we preserve - // it with an assume. - if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && - !isKnownNonZero(V, SQ.DL, 0, AC, LI, &DT)) - addAssumeNonNull(AC, LI); - - // Anything using the load now uses the current value. - LI->replaceAllUsesWith(V); - BB->getInstList().erase(LI); - } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - // Delete this instruction and mark the name as the current holder of the - // value - AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); - if (!Dest) - continue; - - DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); - if (ai == AllocaLookup.end()) - continue; - - // what value were we writing? - unsigned AllocaNo = ai->second; - IncomingVals[AllocaNo] = SI->getOperand(0); - - // Record debuginfo for the store before removing it. - IncomingLocs[AllocaNo] = SI->getDebugLoc(); - for (DbgVariableIntrinsic *DII : AllocaDbgDeclares[ai->second]) - ConvertDebugDeclareToDebugValue(DII, SI, DIB); - BB->getInstList().erase(SI); - } - } - - // 'Recurse' to our successors. - succ_iterator I = succ_begin(BB), E = succ_end(BB); - if (I == E) - return; - - // Keep track of the successors so we don't visit the same successor twice - SmallPtrSet<BasicBlock *, 8> VisitedSuccs; - - // Handle the first successor without using the worklist. - VisitedSuccs.insert(*I); - Pred = BB; - BB = *I; - ++I; - - for (; I != E; ++I) - if (VisitedSuccs.insert(*I).second) - Worklist.emplace_back(*I, Pred, IncomingVals, IncomingLocs); - - goto NextIteration; -} - -void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, - AssumptionCache *AC) { - // If there is nothing to do, bail out... - if (Allocas.empty()) - return; - - PromoteMem2Reg(Allocas, DT, AC).run(); -} diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp deleted file mode 100644 index bffdd115d940..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ /dev/null @@ -1,495 +0,0 @@ -//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the SSAUpdater class. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/SSAUpdater.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/TinyPtrVector.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DebugLoc.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Use.h" -#include "llvm/IR/Value.h" -#include "llvm/IR/ValueHandle.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/SSAUpdaterImpl.h" -#include <cassert> -#include <utility> - -using namespace llvm; - -#define DEBUG_TYPE "ssaupdater" - -using AvailableValsTy = DenseMap<BasicBlock *, Value *>; - -static AvailableValsTy &getAvailableVals(void *AV) { - return *static_cast<AvailableValsTy*>(AV); -} - -SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode *> *NewPHI) - : InsertedPHIs(NewPHI) {} - -SSAUpdater::~SSAUpdater() { - delete static_cast<AvailableValsTy*>(AV); -} - -void SSAUpdater::Initialize(Type *Ty, StringRef Name) { - if (!AV) - AV = new AvailableValsTy(); - else - getAvailableVals(AV).clear(); - ProtoType = Ty; - ProtoName = Name; -} - -bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const { - return getAvailableVals(AV).count(BB); -} - -Value *SSAUpdater::FindValueForBlock(BasicBlock *BB) const { - AvailableValsTy::iterator AVI = getAvailableVals(AV).find(BB); - return (AVI != getAvailableVals(AV).end()) ? AVI->second : nullptr; -} - -void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { - assert(ProtoType && "Need to initialize SSAUpdater"); - assert(ProtoType == V->getType() && - "All rewritten values must have the same type"); - getAvailableVals(AV)[BB] = V; -} - -static bool IsEquivalentPHI(PHINode *PHI, - SmallDenseMap<BasicBlock *, Value *, 8> &ValueMapping) { - unsigned PHINumValues = PHI->getNumIncomingValues(); - if (PHINumValues != ValueMapping.size()) - return false; - - // Scan the phi to see if it matches. - for (unsigned i = 0, e = PHINumValues; i != e; ++i) - if (ValueMapping[PHI->getIncomingBlock(i)] != - PHI->getIncomingValue(i)) { - return false; - } - - return true; -} - -Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) { - Value *Res = GetValueAtEndOfBlockInternal(BB); - return Res; -} - -Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { - // If there is no definition of the renamed variable in this block, just use - // GetValueAtEndOfBlock to do our work. - if (!HasValueForBlock(BB)) - return GetValueAtEndOfBlock(BB); - - // Otherwise, we have the hard case. Get the live-in values for each - // predecessor. - SmallVector<std::pair<BasicBlock *, Value *>, 8> PredValues; - Value *SingularValue = nullptr; - - // We can get our predecessor info by walking the pred_iterator list, but it - // is relatively slow. If we already have PHI nodes in this block, walk one - // of them to get the predecessor list instead. - if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { - for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) { - BasicBlock *PredBB = SomePhi->getIncomingBlock(i); - Value *PredVal = GetValueAtEndOfBlock(PredBB); - PredValues.push_back(std::make_pair(PredBB, PredVal)); - - // Compute SingularValue. - if (i == 0) - SingularValue = PredVal; - else if (PredVal != SingularValue) - SingularValue = nullptr; - } - } else { - bool isFirstPred = true; - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *PredBB = *PI; - Value *PredVal = GetValueAtEndOfBlock(PredBB); - PredValues.push_back(std::make_pair(PredBB, PredVal)); - - // Compute SingularValue. - if (isFirstPred) { - SingularValue = PredVal; - isFirstPred = false; - } else if (PredVal != SingularValue) - SingularValue = nullptr; - } - } - - // If there are no predecessors, just return undef. - if (PredValues.empty()) - return UndefValue::get(ProtoType); - - // Otherwise, if all the merged values are the same, just use it. - if (SingularValue) - return SingularValue; - - // Otherwise, we do need a PHI: check to see if we already have one available - // in this block that produces the right value. - if (isa<PHINode>(BB->begin())) { - SmallDenseMap<BasicBlock *, Value *, 8> ValueMapping(PredValues.begin(), - PredValues.end()); - for (PHINode &SomePHI : BB->phis()) { - if (IsEquivalentPHI(&SomePHI, ValueMapping)) - return &SomePHI; - } - } - - // Ok, we have no way out, insert a new one now. - PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(), - ProtoName, &BB->front()); - - // Fill in all the predecessors of the PHI. - for (const auto &PredValue : PredValues) - InsertedPHI->addIncoming(PredValue.second, PredValue.first); - - // See if the PHI node can be merged to a single value. This can happen in - // loop cases when we get a PHI of itself and one other value. - if (Value *V = - SimplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) { - InsertedPHI->eraseFromParent(); - return V; - } - - // Set the DebugLoc of the inserted PHI, if available. - DebugLoc DL; - if (const Instruction *I = BB->getFirstNonPHI()) - DL = I->getDebugLoc(); - InsertedPHI->setDebugLoc(DL); - - // If the client wants to know about all new instructions, tell it. - if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); - - LLVM_DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); - return InsertedPHI; -} - -void SSAUpdater::RewriteUse(Use &U) { - Instruction *User = cast<Instruction>(U.getUser()); - - Value *V; - if (PHINode *UserPN = dyn_cast<PHINode>(User)) - V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U)); - else - V = GetValueInMiddleOfBlock(User->getParent()); - - // Notify that users of the existing value that it is being replaced. - Value *OldVal = U.get(); - if (OldVal != V && OldVal->hasValueHandle()) - ValueHandleBase::ValueIsRAUWd(OldVal, V); - - U.set(V); -} - -void SSAUpdater::RewriteUseAfterInsertions(Use &U) { - Instruction *User = cast<Instruction>(U.getUser()); - - Value *V; - if (PHINode *UserPN = dyn_cast<PHINode>(User)) - V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U)); - else - V = GetValueAtEndOfBlock(User->getParent()); - - U.set(V); -} - -namespace llvm { - -template<> -class SSAUpdaterTraits<SSAUpdater> { -public: - using BlkT = BasicBlock; - using ValT = Value *; - using PhiT = PHINode; - using BlkSucc_iterator = succ_iterator; - - static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); } - static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); } - - class PHI_iterator { - private: - PHINode *PHI; - unsigned idx; - - public: - explicit PHI_iterator(PHINode *P) // begin iterator - : PHI(P), idx(0) {} - PHI_iterator(PHINode *P, bool) // end iterator - : PHI(P), idx(PHI->getNumIncomingValues()) {} - - PHI_iterator &operator++() { ++idx; return *this; } - bool operator==(const PHI_iterator& x) const { return idx == x.idx; } - bool operator!=(const PHI_iterator& x) const { return !operator==(x); } - - Value *getIncomingValue() { return PHI->getIncomingValue(idx); } - BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); } - }; - - static PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } - static PHI_iterator PHI_end(PhiT *PHI) { - return PHI_iterator(PHI, true); - } - - /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds - /// vector, set Info->NumPreds, and allocate space in Info->Preds. - static void FindPredecessorBlocks(BasicBlock *BB, - SmallVectorImpl<BasicBlock *> *Preds) { - // We can get our predecessor info by walking the pred_iterator list, - // but it is relatively slow. If we already have PHI nodes in this - // block, walk one of them to get the predecessor list instead. - if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { - Preds->append(SomePhi->block_begin(), SomePhi->block_end()); - } else { - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - Preds->push_back(*PI); - } - } - - /// GetUndefVal - Get an undefined value of the same type as the value - /// being handled. - static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) { - return UndefValue::get(Updater->ProtoType); - } - - /// CreateEmptyPHI - Create a new PHI instruction in the specified block. - /// Reserve space for the operands but do not fill them in yet. - static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds, - SSAUpdater *Updater) { - PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds, - Updater->ProtoName, &BB->front()); - return PHI; - } - - /// AddPHIOperand - Add the specified value as an operand of the PHI for - /// the specified predecessor block. - static void AddPHIOperand(PHINode *PHI, Value *Val, BasicBlock *Pred) { - PHI->addIncoming(Val, Pred); - } - - /// InstrIsPHI - Check if an instruction is a PHI. - /// - static PHINode *InstrIsPHI(Instruction *I) { - return dyn_cast<PHINode>(I); - } - - /// ValueIsPHI - Check if a value is a PHI. - static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) { - return dyn_cast<PHINode>(Val); - } - - /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source - /// operands, i.e., it was just added. - static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) { - PHINode *PHI = ValueIsPHI(Val, Updater); - if (PHI && PHI->getNumIncomingValues() == 0) - return PHI; - return nullptr; - } - - /// GetPHIValue - For the specified PHI instruction, return the value - /// that it defines. - static Value *GetPHIValue(PHINode *PHI) { - return PHI; - } -}; - -} // end namespace llvm - -/// Check to see if AvailableVals has an entry for the specified BB and if so, -/// return it. If not, construct SSA form by first calculating the required -/// placement of PHIs and then inserting new PHIs where needed. -Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { - AvailableValsTy &AvailableVals = getAvailableVals(AV); - if (Value *V = AvailableVals[BB]) - return V; - - SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs); - return Impl.GetValue(BB); -} - -//===----------------------------------------------------------------------===// -// LoadAndStorePromoter Implementation -//===----------------------------------------------------------------------===// - -LoadAndStorePromoter:: -LoadAndStorePromoter(ArrayRef<const Instruction *> Insts, - SSAUpdater &S, StringRef BaseName) : SSA(S) { - if (Insts.empty()) return; - - const Value *SomeVal; - if (const LoadInst *LI = dyn_cast<LoadInst>(Insts[0])) - SomeVal = LI; - else - SomeVal = cast<StoreInst>(Insts[0])->getOperand(0); - - if (BaseName.empty()) - BaseName = SomeVal->getName(); - SSA.Initialize(SomeVal->getType(), BaseName); -} - -void LoadAndStorePromoter::run(const SmallVectorImpl<Instruction *> &Insts) { - // First step: bucket up uses of the alloca by the block they occur in. - // This is important because we have to handle multiple defs/uses in a block - // ourselves: SSAUpdater is purely for cross-block references. - DenseMap<BasicBlock *, TinyPtrVector<Instruction *>> UsesByBlock; - - for (Instruction *User : Insts) - UsesByBlock[User->getParent()].push_back(User); - - // Okay, now we can iterate over all the blocks in the function with uses, - // processing them. Keep track of which loads are loading a live-in value. - // Walk the uses in the use-list order to be determinstic. - SmallVector<LoadInst *, 32> LiveInLoads; - DenseMap<Value *, Value *> ReplacedLoads; - - for (Instruction *User : Insts) { - BasicBlock *BB = User->getParent(); - TinyPtrVector<Instruction *> &BlockUses = UsesByBlock[BB]; - - // If this block has already been processed, ignore this repeat use. - if (BlockUses.empty()) continue; - - // Okay, this is the first use in the block. If this block just has a - // single user in it, we can rewrite it trivially. - if (BlockUses.size() == 1) { - // If it is a store, it is a trivial def of the value in the block. - if (StoreInst *SI = dyn_cast<StoreInst>(User)) { - updateDebugInfo(SI); - SSA.AddAvailableValue(BB, SI->getOperand(0)); - } else - // Otherwise it is a load, queue it to rewrite as a live-in load. - LiveInLoads.push_back(cast<LoadInst>(User)); - BlockUses.clear(); - continue; - } - - // Otherwise, check to see if this block is all loads. - bool HasStore = false; - for (Instruction *I : BlockUses) { - if (isa<StoreInst>(I)) { - HasStore = true; - break; - } - } - - // If so, we can queue them all as live in loads. We don't have an - // efficient way to tell which on is first in the block and don't want to - // scan large blocks, so just add all loads as live ins. - if (!HasStore) { - for (Instruction *I : BlockUses) - LiveInLoads.push_back(cast<LoadInst>(I)); - BlockUses.clear(); - continue; - } - - // Otherwise, we have mixed loads and stores (or just a bunch of stores). - // Since SSAUpdater is purely for cross-block values, we need to determine - // the order of these instructions in the block. If the first use in the - // block is a load, then it uses the live in value. The last store defines - // the live out value. We handle this by doing a linear scan of the block. - Value *StoredValue = nullptr; - for (Instruction &I : *BB) { - if (LoadInst *L = dyn_cast<LoadInst>(&I)) { - // If this is a load from an unrelated pointer, ignore it. - if (!isInstInList(L, Insts)) continue; - - // If we haven't seen a store yet, this is a live in use, otherwise - // use the stored value. - if (StoredValue) { - replaceLoadWithValue(L, StoredValue); - L->replaceAllUsesWith(StoredValue); - ReplacedLoads[L] = StoredValue; - } else { - LiveInLoads.push_back(L); - } - continue; - } - - if (StoreInst *SI = dyn_cast<StoreInst>(&I)) { - // If this is a store to an unrelated pointer, ignore it. - if (!isInstInList(SI, Insts)) continue; - updateDebugInfo(SI); - - // Remember that this is the active value in the block. - StoredValue = SI->getOperand(0); - } - } - - // The last stored value that happened is the live-out for the block. - assert(StoredValue && "Already checked that there is a store in block"); - SSA.AddAvailableValue(BB, StoredValue); - BlockUses.clear(); - } - - // Okay, now we rewrite all loads that use live-in values in the loop, - // inserting PHI nodes as necessary. - for (LoadInst *ALoad : LiveInLoads) { - Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent()); - replaceLoadWithValue(ALoad, NewVal); - - // Avoid assertions in unreachable code. - if (NewVal == ALoad) NewVal = UndefValue::get(NewVal->getType()); - ALoad->replaceAllUsesWith(NewVal); - ReplacedLoads[ALoad] = NewVal; - } - - // Allow the client to do stuff before we start nuking things. - doExtraRewritesBeforeFinalDeletion(); - - // Now that everything is rewritten, delete the old instructions from the - // function. They should all be dead now. - for (Instruction *User : Insts) { - // If this is a load that still has uses, then the load must have been added - // as a live value in the SSAUpdate data structure for a block (e.g. because - // the loaded value was stored later). In this case, we need to recursively - // propagate the updates until we get to the real value. - if (!User->use_empty()) { - Value *NewVal = ReplacedLoads[User]; - assert(NewVal && "not a replaced load?"); - - // Propagate down to the ultimate replacee. The intermediately loads - // could theoretically already have been deleted, so we don't want to - // dereference the Value*'s. - DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal); - while (RLI != ReplacedLoads.end()) { - NewVal = RLI->second; - RLI = ReplacedLoads.find(NewVal); - } - - replaceLoadWithValue(cast<LoadInst>(User), NewVal); - User->replaceAllUsesWith(NewVal); - } - - instructionDeleted(User); - User->eraseFromParent(); - } -} - -bool -LoadAndStorePromoter::isInstInList(Instruction *I, - const SmallVectorImpl<Instruction *> &Insts) - const { - return is_contained(Insts, I); -} diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp deleted file mode 100644 index 917d5e0a1ef0..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp +++ /dev/null @@ -1,190 +0,0 @@ -//===- SSAUpdaterBulk.cpp - Unstructured SSA Update Tool ------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the SSAUpdaterBulk class. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/SSAUpdaterBulk.h" -#include "llvm/Analysis/IteratedDominanceFrontier.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Use.h" -#include "llvm/IR/Value.h" - -using namespace llvm; - -#define DEBUG_TYPE "ssaupdaterbulk" - -/// Helper function for finding a block which should have a value for the given -/// user. For PHI-nodes this block is the corresponding predecessor, for other -/// instructions it's their parent block. -static BasicBlock *getUserBB(Use *U) { - auto *User = cast<Instruction>(U->getUser()); - - if (auto *UserPN = dyn_cast<PHINode>(User)) - return UserPN->getIncomingBlock(*U); - else - return User->getParent(); -} - -/// Add a new variable to the SSA rewriter. This needs to be called before -/// AddAvailableValue or AddUse calls. -unsigned SSAUpdaterBulk::AddVariable(StringRef Name, Type *Ty) { - unsigned Var = Rewrites.size(); - LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var << ": initialized with Ty = " - << *Ty << ", Name = " << Name << "\n"); - RewriteInfo RI(Name, Ty); - Rewrites.push_back(RI); - return Var; -} - -/// Indicate that a rewritten value is available in the specified block with the -/// specified value. -void SSAUpdaterBulk::AddAvailableValue(unsigned Var, BasicBlock *BB, Value *V) { - assert(Var < Rewrites.size() && "Variable not found!"); - LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var - << ": added new available value" << *V << " in " - << BB->getName() << "\n"); - Rewrites[Var].Defines[BB] = V; -} - -/// Record a use of the symbolic value. This use will be updated with a -/// rewritten value when RewriteAllUses is called. -void SSAUpdaterBulk::AddUse(unsigned Var, Use *U) { - assert(Var < Rewrites.size() && "Variable not found!"); - LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var << ": added a use" << *U->get() - << " in " << getUserBB(U)->getName() << "\n"); - Rewrites[Var].Uses.push_back(U); -} - -/// Return true if the SSAUpdater already has a value for the specified variable -/// in the specified block. -bool SSAUpdaterBulk::HasValueForBlock(unsigned Var, BasicBlock *BB) { - return (Var < Rewrites.size()) ? Rewrites[Var].Defines.count(BB) : false; -} - -// Compute value at the given block BB. We either should already know it, or we -// should be able to recursively reach it going up dominator tree. -Value *SSAUpdaterBulk::computeValueAt(BasicBlock *BB, RewriteInfo &R, - DominatorTree *DT) { - if (!R.Defines.count(BB)) { - if (DT->isReachableFromEntry(BB) && PredCache.get(BB).size()) { - BasicBlock *IDom = DT->getNode(BB)->getIDom()->getBlock(); - Value *V = computeValueAt(IDom, R, DT); - R.Defines[BB] = V; - } else - R.Defines[BB] = UndefValue::get(R.Ty); - } - return R.Defines[BB]; -} - -/// Given sets of UsingBlocks and DefBlocks, compute the set of LiveInBlocks. -/// This is basically a subgraph limited by DefBlocks and UsingBlocks. -static void -ComputeLiveInBlocks(const SmallPtrSetImpl<BasicBlock *> &UsingBlocks, - const SmallPtrSetImpl<BasicBlock *> &DefBlocks, - SmallPtrSetImpl<BasicBlock *> &LiveInBlocks, - PredIteratorCache &PredCache) { - // To determine liveness, we must iterate through the predecessors of blocks - // where the def is live. Blocks are added to the worklist if we need to - // check their predecessors. Start with all the using blocks. - SmallVector<BasicBlock *, 64> LiveInBlockWorklist(UsingBlocks.begin(), - UsingBlocks.end()); - - // Now that we have a set of blocks where the phi is live-in, recursively add - // their predecessors until we find the full region the value is live. - while (!LiveInBlockWorklist.empty()) { - BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); - - // The block really is live in here, insert it into the set. If already in - // the set, then it has already been processed. - if (!LiveInBlocks.insert(BB).second) - continue; - - // Since the value is live into BB, it is either defined in a predecessor or - // live into it to. Add the preds to the worklist unless they are a - // defining block. - for (BasicBlock *P : PredCache.get(BB)) { - // The value is not live into a predecessor if it defines the value. - if (DefBlocks.count(P)) - continue; - - // Otherwise it is, add to the worklist. - LiveInBlockWorklist.push_back(P); - } - } -} - -/// Perform all the necessary updates, including new PHI-nodes insertion and the -/// requested uses update. -void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, - SmallVectorImpl<PHINode *> *InsertedPHIs) { - for (auto &R : Rewrites) { - // Compute locations for new phi-nodes. - // For that we need to initialize DefBlocks from definitions in R.Defines, - // UsingBlocks from uses in R.Uses, then compute LiveInBlocks, and then use - // this set for computing iterated dominance frontier (IDF). - // The IDF blocks are the blocks where we need to insert new phi-nodes. - ForwardIDFCalculator IDF(*DT); - LLVM_DEBUG(dbgs() << "SSAUpdater: rewriting " << R.Uses.size() - << " use(s)\n"); - - SmallPtrSet<BasicBlock *, 2> DefBlocks; - for (auto &Def : R.Defines) - DefBlocks.insert(Def.first); - IDF.setDefiningBlocks(DefBlocks); - - SmallPtrSet<BasicBlock *, 2> UsingBlocks; - for (Use *U : R.Uses) - UsingBlocks.insert(getUserBB(U)); - - SmallVector<BasicBlock *, 32> IDFBlocks; - SmallPtrSet<BasicBlock *, 32> LiveInBlocks; - ComputeLiveInBlocks(UsingBlocks, DefBlocks, LiveInBlocks, PredCache); - IDF.resetLiveInBlocks(); - IDF.setLiveInBlocks(LiveInBlocks); - IDF.calculate(IDFBlocks); - - // We've computed IDF, now insert new phi-nodes there. - SmallVector<PHINode *, 4> InsertedPHIsForVar; - for (auto *FrontierBB : IDFBlocks) { - IRBuilder<> B(FrontierBB, FrontierBB->begin()); - PHINode *PN = B.CreatePHI(R.Ty, 0, R.Name); - R.Defines[FrontierBB] = PN; - InsertedPHIsForVar.push_back(PN); - if (InsertedPHIs) - InsertedPHIs->push_back(PN); - } - - // Fill in arguments of the inserted PHIs. - for (auto *PN : InsertedPHIsForVar) { - BasicBlock *PBB = PN->getParent(); - for (BasicBlock *Pred : PredCache.get(PBB)) - PN->addIncoming(computeValueAt(Pred, R, DT), Pred); - } - - // Rewrite actual uses with the inserted definitions. - SmallPtrSet<Use *, 4> ProcessedUses; - for (Use *U : R.Uses) { - if (!ProcessedUses.insert(U).second) - continue; - Value *V = computeValueAt(getUserBB(U), R, DT); - Value *OldVal = U->get(); - assert(OldVal && "Invalid use!"); - // Notify that users of the existing value that it is being replaced. - if (OldVal != V && OldVal->hasValueHandle()) - ValueHandleBase::ValueIsRAUWd(OldVal, V); - LLVM_DEBUG(dbgs() << "SSAUpdater: replacing " << *OldVal << " with " << *V - << "\n"); - U->set(V); - } - } -} diff --git a/contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp b/contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp deleted file mode 100644 index a1313c77ed77..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp +++ /dev/null @@ -1,107 +0,0 @@ -//===- SanitizerStats.cpp - Sanitizer statistics gathering ----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Implements code generation for sanitizer statistics gathering. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/SanitizerStats.h" -#include "llvm/ADT/Triple.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" - -using namespace llvm; - -SanitizerStatReport::SanitizerStatReport(Module *M) : M(M) { - StatTy = ArrayType::get(Type::getInt8PtrTy(M->getContext()), 2); - EmptyModuleStatsTy = makeModuleStatsTy(); - - ModuleStatsGV = new GlobalVariable(*M, EmptyModuleStatsTy, false, - GlobalValue::InternalLinkage, nullptr); -} - -ArrayType *SanitizerStatReport::makeModuleStatsArrayTy() { - return ArrayType::get(StatTy, Inits.size()); -} - -StructType *SanitizerStatReport::makeModuleStatsTy() { - return StructType::get(M->getContext(), {Type::getInt8PtrTy(M->getContext()), - Type::getInt32Ty(M->getContext()), - makeModuleStatsArrayTy()}); -} - -void SanitizerStatReport::create(IRBuilder<> &B, SanitizerStatKind SK) { - Function *F = B.GetInsertBlock()->getParent(); - Module *M = F->getParent(); - PointerType *Int8PtrTy = B.getInt8PtrTy(); - IntegerType *IntPtrTy = B.getIntPtrTy(M->getDataLayout()); - ArrayType *StatTy = ArrayType::get(Int8PtrTy, 2); - - Inits.push_back(ConstantArray::get( - StatTy, - {Constant::getNullValue(Int8PtrTy), - ConstantExpr::getIntToPtr( - ConstantInt::get(IntPtrTy, uint64_t(SK) << (IntPtrTy->getBitWidth() - - kSanitizerStatKindBits)), - Int8PtrTy)})); - - FunctionType *StatReportTy = - FunctionType::get(B.getVoidTy(), Int8PtrTy, false); - FunctionCallee StatReport = - M->getOrInsertFunction("__sanitizer_stat_report", StatReportTy); - - auto InitAddr = ConstantExpr::getGetElementPtr( - EmptyModuleStatsTy, ModuleStatsGV, - ArrayRef<Constant *>{ - ConstantInt::get(IntPtrTy, 0), ConstantInt::get(B.getInt32Ty(), 2), - ConstantInt::get(IntPtrTy, Inits.size() - 1), - }); - B.CreateCall(StatReport, ConstantExpr::getBitCast(InitAddr, Int8PtrTy)); -} - -void SanitizerStatReport::finish() { - if (Inits.empty()) { - ModuleStatsGV->eraseFromParent(); - return; - } - - PointerType *Int8PtrTy = Type::getInt8PtrTy(M->getContext()); - IntegerType *Int32Ty = Type::getInt32Ty(M->getContext()); - Type *VoidTy = Type::getVoidTy(M->getContext()); - - // Create a new ModuleStatsGV to replace the old one. We can't just set the - // old one's initializer because its type is different. - auto NewModuleStatsGV = new GlobalVariable( - *M, makeModuleStatsTy(), false, GlobalValue::InternalLinkage, - ConstantStruct::getAnon( - {Constant::getNullValue(Int8PtrTy), - ConstantInt::get(Int32Ty, Inits.size()), - ConstantArray::get(makeModuleStatsArrayTy(), Inits)})); - ModuleStatsGV->replaceAllUsesWith( - ConstantExpr::getBitCast(NewModuleStatsGV, ModuleStatsGV->getType())); - ModuleStatsGV->eraseFromParent(); - - // Create a global constructor to register NewModuleStatsGV. - auto F = Function::Create(FunctionType::get(VoidTy, false), - GlobalValue::InternalLinkage, "", M); - auto BB = BasicBlock::Create(M->getContext(), "", F); - IRBuilder<> B(BB); - - FunctionType *StatInitTy = FunctionType::get(VoidTy, Int8PtrTy, false); - FunctionCallee StatInit = - M->getOrInsertFunction("__sanitizer_stat_init", StatInitTy); - - B.CreateCall(StatInit, ConstantExpr::getBitCast(NewModuleStatsGV, Int8PtrTy)); - B.CreateRetVoid(); - - appendToGlobalCtors(*M, F, 0); -} diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp deleted file mode 100644 index 6e2ef67408d9..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ /dev/null @@ -1,6081 +0,0 @@ -//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Peephole optimize the CFG. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetOperations.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/EHPersonalities.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/MemorySSA.h" -#include "llvm/Analysis/MemorySSAUpdater.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/ConstantRange.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/NoFolder.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Use.h" -#include "llvm/IR/User.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/KnownBits.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -#include <algorithm> -#include <cassert> -#include <climits> -#include <cstddef> -#include <cstdint> -#include <iterator> -#include <map> -#include <set> -#include <tuple> -#include <utility> -#include <vector> - -using namespace llvm; -using namespace PatternMatch; - -#define DEBUG_TYPE "simplifycfg" - -// Chosen as 2 so as to be cheap, but still to have enough power to fold -// a select, so the "clamp" idiom (of a min followed by a max) will be caught. -// To catch this, we need to fold a compare and a select, hence '2' being the -// minimum reasonable default. -static cl::opt<unsigned> PHINodeFoldingThreshold( - "phi-node-folding-threshold", cl::Hidden, cl::init(2), - cl::desc( - "Control the amount of phi node folding to perform (default = 2)")); - -static cl::opt<bool> DupRet( - "simplifycfg-dup-ret", cl::Hidden, cl::init(false), - cl::desc("Duplicate return instructions into unconditional branches")); - -static cl::opt<bool> - SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), - cl::desc("Sink common instructions down to the end block")); - -static cl::opt<bool> HoistCondStores( - "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), - cl::desc("Hoist conditional stores if an unconditional store precedes")); - -static cl::opt<bool> MergeCondStores( - "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), - cl::desc("Hoist conditional stores even if an unconditional store does not " - "precede - hoist multiple conditional stores into a single " - "predicated store")); - -static cl::opt<bool> MergeCondStoresAggressively( - "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), - cl::desc("When merging conditional stores, do so even if the resultant " - "basic blocks are unlikely to be if-converted as a result")); - -static cl::opt<bool> SpeculateOneExpensiveInst( - "speculate-one-expensive-inst", cl::Hidden, cl::init(true), - cl::desc("Allow exactly one expensive instruction to be speculatively " - "executed")); - -static cl::opt<unsigned> MaxSpeculationDepth( - "max-speculation-depth", cl::Hidden, cl::init(10), - cl::desc("Limit maximum recursion depth when calculating costs of " - "speculatively executed instructions")); - -STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); -STATISTIC(NumLinearMaps, - "Number of switch instructions turned into linear mapping"); -STATISTIC(NumLookupTables, - "Number of switch instructions turned into lookup tables"); -STATISTIC( - NumLookupTablesHoles, - "Number of switch instructions turned into lookup tables (holes checked)"); -STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares"); -STATISTIC(NumSinkCommons, - "Number of common instructions sunk down to the end block"); -STATISTIC(NumSpeculations, "Number of speculative executed instructions"); - -namespace { - -// The first field contains the value that the switch produces when a certain -// case group is selected, and the second field is a vector containing the -// cases composing the case group. -using SwitchCaseResultVectorTy = - SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>; - -// The first field contains the phi node that generates a result of the switch -// and the second field contains the value generated for a certain case in the -// switch for that PHI. -using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>; - -/// ValueEqualityComparisonCase - Represents a case of a switch. -struct ValueEqualityComparisonCase { - ConstantInt *Value; - BasicBlock *Dest; - - ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest) - : Value(Value), Dest(Dest) {} - - bool operator<(ValueEqualityComparisonCase RHS) const { - // Comparing pointers is ok as we only rely on the order for uniquing. - return Value < RHS.Value; - } - - bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; } -}; - -class SimplifyCFGOpt { - const TargetTransformInfo &TTI; - const DataLayout &DL; - SmallPtrSetImpl<BasicBlock *> *LoopHeaders; - const SimplifyCFGOptions &Options; - bool Resimplify; - - Value *isValueEqualityComparison(Instruction *TI); - BasicBlock *GetValueEqualityComparisonCases( - Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases); - bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI, - BasicBlock *Pred, - IRBuilder<> &Builder); - bool FoldValueComparisonIntoPredecessors(Instruction *TI, - IRBuilder<> &Builder); - - bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder); - bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder); - bool SimplifySingleResume(ResumeInst *RI); - bool SimplifyCommonResume(ResumeInst *RI); - bool SimplifyCleanupReturn(CleanupReturnInst *RI); - bool SimplifyUnreachable(UnreachableInst *UI); - bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder); - bool SimplifyIndirectBr(IndirectBrInst *IBI); - bool SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder); - bool SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder); - - bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, - IRBuilder<> &Builder); - -public: - SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL, - SmallPtrSetImpl<BasicBlock *> *LoopHeaders, - const SimplifyCFGOptions &Opts) - : TTI(TTI), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {} - - bool run(BasicBlock *BB); - bool simplifyOnce(BasicBlock *BB); - - // Helper to set Resimplify and return change indication. - bool requestResimplify() { - Resimplify = true; - return true; - } -}; - -} // end anonymous namespace - -/// Return true if it is safe to merge these two -/// terminator instructions together. -static bool -SafeToMergeTerminators(Instruction *SI1, Instruction *SI2, - SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) { - if (SI1 == SI2) - return false; // Can't merge with self! - - // It is not safe to merge these two switch instructions if they have a common - // successor, and if that successor has a PHI node, and if *that* PHI node has - // conflicting incoming values from the two switch blocks. - BasicBlock *SI1BB = SI1->getParent(); - BasicBlock *SI2BB = SI2->getParent(); - - SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); - bool Fail = false; - for (BasicBlock *Succ : successors(SI2BB)) - if (SI1Succs.count(Succ)) - for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) { - PHINode *PN = cast<PHINode>(BBI); - if (PN->getIncomingValueForBlock(SI1BB) != - PN->getIncomingValueForBlock(SI2BB)) { - if (FailBlocks) - FailBlocks->insert(Succ); - Fail = true; - } - } - - return !Fail; -} - -/// Return true if it is safe and profitable to merge these two terminator -/// instructions together, where SI1 is an unconditional branch. PhiNodes will -/// store all PHI nodes in common successors. -static bool -isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2, - Instruction *Cond, - SmallVectorImpl<PHINode *> &PhiNodes) { - if (SI1 == SI2) - return false; // Can't merge with self! - assert(SI1->isUnconditional() && SI2->isConditional()); - - // We fold the unconditional branch if we can easily update all PHI nodes in - // common successors: - // 1> We have a constant incoming value for the conditional branch; - // 2> We have "Cond" as the incoming value for the unconditional branch; - // 3> SI2->getCondition() and Cond have same operands. - CmpInst *Ci2 = dyn_cast<CmpInst>(SI2->getCondition()); - if (!Ci2) - return false; - if (!(Cond->getOperand(0) == Ci2->getOperand(0) && - Cond->getOperand(1) == Ci2->getOperand(1)) && - !(Cond->getOperand(0) == Ci2->getOperand(1) && - Cond->getOperand(1) == Ci2->getOperand(0))) - return false; - - BasicBlock *SI1BB = SI1->getParent(); - BasicBlock *SI2BB = SI2->getParent(); - SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); - for (BasicBlock *Succ : successors(SI2BB)) - if (SI1Succs.count(Succ)) - for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) { - PHINode *PN = cast<PHINode>(BBI); - if (PN->getIncomingValueForBlock(SI1BB) != Cond || - !isa<ConstantInt>(PN->getIncomingValueForBlock(SI2BB))) - return false; - PhiNodes.push_back(PN); - } - return true; -} - -/// Update PHI nodes in Succ to indicate that there will now be entries in it -/// from the 'NewPred' block. The values that will be flowing into the PHI nodes -/// will be the same as those coming in from ExistPred, an existing predecessor -/// of Succ. -static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, - BasicBlock *ExistPred, - MemorySSAUpdater *MSSAU = nullptr) { - for (PHINode &PN : Succ->phis()) - PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred); - if (MSSAU) - if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ)) - MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred); -} - -/// Compute an abstract "cost" of speculating the given instruction, -/// which is assumed to be safe to speculate. TCC_Free means cheap, -/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively -/// expensive. -static unsigned ComputeSpeculationCost(const User *I, - const TargetTransformInfo &TTI) { - assert(isSafeToSpeculativelyExecute(I) && - "Instruction is not safe to speculatively execute!"); - return TTI.getUserCost(I); -} - -/// If we have a merge point of an "if condition" as accepted above, -/// return true if the specified value dominates the block. We -/// don't handle the true generality of domination here, just a special case -/// which works well enough for us. -/// -/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to -/// see if V (which must be an instruction) and its recursive operands -/// that do not dominate BB have a combined cost lower than CostRemaining and -/// are non-trapping. If both are true, the instruction is inserted into the -/// set and true is returned. -/// -/// The cost for most non-trapping instructions is defined as 1 except for -/// Select whose cost is 2. -/// -/// After this function returns, CostRemaining is decreased by the cost of -/// V plus its non-dominating operands. If that cost is greater than -/// CostRemaining, false is returned and CostRemaining is undefined. -static bool DominatesMergePoint(Value *V, BasicBlock *BB, - SmallPtrSetImpl<Instruction *> &AggressiveInsts, - unsigned &CostRemaining, - const TargetTransformInfo &TTI, - unsigned Depth = 0) { - // It is possible to hit a zero-cost cycle (phi/gep instructions for example), - // so limit the recursion depth. - // TODO: While this recursion limit does prevent pathological behavior, it - // would be better to track visited instructions to avoid cycles. - if (Depth == MaxSpeculationDepth) - return false; - - Instruction *I = dyn_cast<Instruction>(V); - if (!I) { - // Non-instructions all dominate instructions, but not all constantexprs - // can be executed unconditionally. - if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) - if (C->canTrap()) - return false; - return true; - } - BasicBlock *PBB = I->getParent(); - - // We don't want to allow weird loops that might have the "if condition" in - // the bottom of this block. - if (PBB == BB) - return false; - - // If this instruction is defined in a block that contains an unconditional - // branch to BB, then it must be in the 'conditional' part of the "if - // statement". If not, it definitely dominates the region. - BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator()); - if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB) - return true; - - // If we have seen this instruction before, don't count it again. - if (AggressiveInsts.count(I)) - return true; - - // Okay, it looks like the instruction IS in the "condition". Check to - // see if it's a cheap instruction to unconditionally compute, and if it - // only uses stuff defined outside of the condition. If so, hoist it out. - if (!isSafeToSpeculativelyExecute(I)) - return false; - - unsigned Cost = ComputeSpeculationCost(I, TTI); - - // Allow exactly one instruction to be speculated regardless of its cost - // (as long as it is safe to do so). - // This is intended to flatten the CFG even if the instruction is a division - // or other expensive operation. The speculation of an expensive instruction - // is expected to be undone in CodeGenPrepare if the speculation has not - // enabled further IR optimizations. - if (Cost > CostRemaining && - (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0)) - return false; - - // Avoid unsigned wrap. - CostRemaining = (Cost > CostRemaining) ? 0 : CostRemaining - Cost; - - // Okay, we can only really hoist these out if their operands do - // not take us over the cost threshold. - for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI, - Depth + 1)) - return false; - // Okay, it's safe to do this! Remember this instruction. - AggressiveInsts.insert(I); - return true; -} - -/// Extract ConstantInt from value, looking through IntToPtr -/// and PointerNullValue. Return NULL if value is not a constant int. -static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) { - // Normal constant int. - ConstantInt *CI = dyn_cast<ConstantInt>(V); - if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy()) - return CI; - - // This is some kind of pointer constant. Turn it into a pointer-sized - // ConstantInt if possible. - IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType())); - - // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*). - if (isa<ConstantPointerNull>(V)) - return ConstantInt::get(PtrTy, 0); - - // IntToPtr const int. - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - if (CE->getOpcode() == Instruction::IntToPtr) - if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) { - // The constant is very likely to have the right type already. - if (CI->getType() == PtrTy) - return CI; - else - return cast<ConstantInt>( - ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false)); - } - return nullptr; -} - -namespace { - -/// Given a chain of or (||) or and (&&) comparison of a value against a -/// constant, this will try to recover the information required for a switch -/// structure. -/// It will depth-first traverse the chain of comparison, seeking for patterns -/// like %a == 12 or %a < 4 and combine them to produce a set of integer -/// representing the different cases for the switch. -/// Note that if the chain is composed of '||' it will build the set of elements -/// that matches the comparisons (i.e. any of this value validate the chain) -/// while for a chain of '&&' it will build the set elements that make the test -/// fail. -struct ConstantComparesGatherer { - const DataLayout &DL; - - /// Value found for the switch comparison - Value *CompValue = nullptr; - - /// Extra clause to be checked before the switch - Value *Extra = nullptr; - - /// Set of integers to match in switch - SmallVector<ConstantInt *, 8> Vals; - - /// Number of comparisons matched in the and/or chain - unsigned UsedICmps = 0; - - /// Construct and compute the result for the comparison instruction Cond - ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) { - gather(Cond); - } - - ConstantComparesGatherer(const ConstantComparesGatherer &) = delete; - ConstantComparesGatherer & - operator=(const ConstantComparesGatherer &) = delete; - -private: - /// Try to set the current value used for the comparison, it succeeds only if - /// it wasn't set before or if the new value is the same as the old one - bool setValueOnce(Value *NewVal) { - if (CompValue && CompValue != NewVal) - return false; - CompValue = NewVal; - return (CompValue != nullptr); - } - - /// Try to match Instruction "I" as a comparison against a constant and - /// populates the array Vals with the set of values that match (or do not - /// match depending on isEQ). - /// Return false on failure. On success, the Value the comparison matched - /// against is placed in CompValue. - /// If CompValue is already set, the function is expected to fail if a match - /// is found but the value compared to is different. - bool matchInstruction(Instruction *I, bool isEQ) { - // If this is an icmp against a constant, handle this as one of the cases. - ICmpInst *ICI; - ConstantInt *C; - if (!((ICI = dyn_cast<ICmpInst>(I)) && - (C = GetConstantInt(I->getOperand(1), DL)))) { - return false; - } - - Value *RHSVal; - const APInt *RHSC; - - // Pattern match a special case - // (x & ~2^z) == y --> x == y || x == y|2^z - // This undoes a transformation done by instcombine to fuse 2 compares. - if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) { - // It's a little bit hard to see why the following transformations are - // correct. Here is a CVC3 program to verify them for 64-bit values: - - /* - ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63); - x : BITVECTOR(64); - y : BITVECTOR(64); - z : BITVECTOR(64); - mask : BITVECTOR(64) = BVSHL(ONE, z); - QUERY( (y & ~mask = y) => - ((x & ~mask = y) <=> (x = y OR x = (y | mask))) - ); - QUERY( (y | mask = y) => - ((x | mask = y) <=> (x = y OR x = (y & ~mask))) - ); - */ - - // Please note that each pattern must be a dual implication (<--> or - // iff). One directional implication can create spurious matches. If the - // implication is only one-way, an unsatisfiable condition on the left - // side can imply a satisfiable condition on the right side. Dual - // implication ensures that satisfiable conditions are transformed to - // other satisfiable conditions and unsatisfiable conditions are - // transformed to other unsatisfiable conditions. - - // Here is a concrete example of a unsatisfiable condition on the left - // implying a satisfiable condition on the right: - // - // mask = (1 << z) - // (x & ~mask) == y --> (x == y || x == (y | mask)) - // - // Substituting y = 3, z = 0 yields: - // (x & -2) == 3 --> (x == 3 || x == 2) - - // Pattern match a special case: - /* - QUERY( (y & ~mask = y) => - ((x & ~mask = y) <=> (x = y OR x = (y | mask))) - ); - */ - if (match(ICI->getOperand(0), - m_And(m_Value(RHSVal), m_APInt(RHSC)))) { - APInt Mask = ~*RHSC; - if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) { - // If we already have a value for the switch, it has to match! - if (!setValueOnce(RHSVal)) - return false; - - Vals.push_back(C); - Vals.push_back( - ConstantInt::get(C->getContext(), - C->getValue() | Mask)); - UsedICmps++; - return true; - } - } - - // Pattern match a special case: - /* - QUERY( (y | mask = y) => - ((x | mask = y) <=> (x = y OR x = (y & ~mask))) - ); - */ - if (match(ICI->getOperand(0), - m_Or(m_Value(RHSVal), m_APInt(RHSC)))) { - APInt Mask = *RHSC; - if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) { - // If we already have a value for the switch, it has to match! - if (!setValueOnce(RHSVal)) - return false; - - Vals.push_back(C); - Vals.push_back(ConstantInt::get(C->getContext(), - C->getValue() & ~Mask)); - UsedICmps++; - return true; - } - } - - // If we already have a value for the switch, it has to match! - if (!setValueOnce(ICI->getOperand(0))) - return false; - - UsedICmps++; - Vals.push_back(C); - return ICI->getOperand(0); - } - - // If we have "x ult 3", for example, then we can add 0,1,2 to the set. - ConstantRange Span = ConstantRange::makeAllowedICmpRegion( - ICI->getPredicate(), C->getValue()); - - // Shift the range if the compare is fed by an add. This is the range - // compare idiom as emitted by instcombine. - Value *CandidateVal = I->getOperand(0); - if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) { - Span = Span.subtract(*RHSC); - CandidateVal = RHSVal; - } - - // If this is an and/!= check, then we are looking to build the set of - // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into - // x != 0 && x != 1. - if (!isEQ) - Span = Span.inverse(); - - // If there are a ton of values, we don't want to make a ginormous switch. - if (Span.isSizeLargerThan(8) || Span.isEmptySet()) { - return false; - } - - // If we already have a value for the switch, it has to match! - if (!setValueOnce(CandidateVal)) - return false; - - // Add all values from the range to the set - for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp) - Vals.push_back(ConstantInt::get(I->getContext(), Tmp)); - - UsedICmps++; - return true; - } - - /// Given a potentially 'or'd or 'and'd together collection of icmp - /// eq/ne/lt/gt instructions that compare a value against a constant, extract - /// the value being compared, and stick the list constants into the Vals - /// vector. - /// One "Extra" case is allowed to differ from the other. - void gather(Value *V) { - Instruction *I = dyn_cast<Instruction>(V); - bool isEQ = (I->getOpcode() == Instruction::Or); - - // Keep a stack (SmallVector for efficiency) for depth-first traversal - SmallVector<Value *, 8> DFT; - SmallPtrSet<Value *, 8> Visited; - - // Initialize - Visited.insert(V); - DFT.push_back(V); - - while (!DFT.empty()) { - V = DFT.pop_back_val(); - - if (Instruction *I = dyn_cast<Instruction>(V)) { - // If it is a || (or && depending on isEQ), process the operands. - if (I->getOpcode() == (isEQ ? Instruction::Or : Instruction::And)) { - if (Visited.insert(I->getOperand(1)).second) - DFT.push_back(I->getOperand(1)); - if (Visited.insert(I->getOperand(0)).second) - DFT.push_back(I->getOperand(0)); - continue; - } - - // Try to match the current instruction - if (matchInstruction(I, isEQ)) - // Match succeed, continue the loop - continue; - } - - // One element of the sequence of || (or &&) could not be match as a - // comparison against the same value as the others. - // We allow only one "Extra" case to be checked before the switch - if (!Extra) { - Extra = V; - continue; - } - // Failed to parse a proper sequence, abort now - CompValue = nullptr; - break; - } - } -}; - -} // end anonymous namespace - -static void EraseTerminatorAndDCECond(Instruction *TI, - MemorySSAUpdater *MSSAU = nullptr) { - Instruction *Cond = nullptr; - if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { - Cond = dyn_cast<Instruction>(SI->getCondition()); - } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { - if (BI->isConditional()) - Cond = dyn_cast<Instruction>(BI->getCondition()); - } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) { - Cond = dyn_cast<Instruction>(IBI->getAddress()); - } - - TI->eraseFromParent(); - if (Cond) - RecursivelyDeleteTriviallyDeadInstructions(Cond, nullptr, MSSAU); -} - -/// Return true if the specified terminator checks -/// to see if a value is equal to constant integer value. -Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) { - Value *CV = nullptr; - if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { - // Do not permit merging of large switch instructions into their - // predecessors unless there is only one predecessor. - if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors())) - CV = SI->getCondition(); - } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) - if (BI->isConditional() && BI->getCondition()->hasOneUse()) - if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) { - if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL)) - CV = ICI->getOperand(0); - } - - // Unwrap any lossless ptrtoint cast. - if (CV) { - if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) { - Value *Ptr = PTII->getPointerOperand(); - if (PTII->getType() == DL.getIntPtrType(Ptr->getType())) - CV = Ptr; - } - } - return CV; -} - -/// Given a value comparison instruction, -/// decode all of the 'cases' that it represents and return the 'default' block. -BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases( - Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) { - if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { - Cases.reserve(SI->getNumCases()); - for (auto Case : SI->cases()) - Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(), - Case.getCaseSuccessor())); - return SI->getDefaultDest(); - } - - BranchInst *BI = cast<BranchInst>(TI); - ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); - BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE); - Cases.push_back(ValueEqualityComparisonCase( - GetConstantInt(ICI->getOperand(1), DL), Succ)); - return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ); -} - -/// Given a vector of bb/value pairs, remove any entries -/// in the list that match the specified block. -static void -EliminateBlockCases(BasicBlock *BB, - std::vector<ValueEqualityComparisonCase> &Cases) { - Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end()); -} - -/// Return true if there are any keys in C1 that exist in C2 as well. -static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1, - std::vector<ValueEqualityComparisonCase> &C2) { - std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2; - - // Make V1 be smaller than V2. - if (V1->size() > V2->size()) - std::swap(V1, V2); - - if (V1->empty()) - return false; - if (V1->size() == 1) { - // Just scan V2. - ConstantInt *TheVal = (*V1)[0].Value; - for (unsigned i = 0, e = V2->size(); i != e; ++i) - if (TheVal == (*V2)[i].Value) - return true; - } - - // Otherwise, just sort both lists and compare element by element. - array_pod_sort(V1->begin(), V1->end()); - array_pod_sort(V2->begin(), V2->end()); - unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size(); - while (i1 != e1 && i2 != e2) { - if ((*V1)[i1].Value == (*V2)[i2].Value) - return true; - if ((*V1)[i1].Value < (*V2)[i2].Value) - ++i1; - else - ++i2; - } - return false; -} - -// Set branch weights on SwitchInst. This sets the metadata if there is at -// least one non-zero weight. -static void setBranchWeights(SwitchInst *SI, ArrayRef<uint32_t> Weights) { - // Check that there is at least one non-zero weight. Otherwise, pass - // nullptr to setMetadata which will erase the existing metadata. - MDNode *N = nullptr; - if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; })) - N = MDBuilder(SI->getParent()->getContext()).createBranchWeights(Weights); - SI->setMetadata(LLVMContext::MD_prof, N); -} - -// Similar to the above, but for branch and select instructions that take -// exactly 2 weights. -static void setBranchWeights(Instruction *I, uint32_t TrueWeight, - uint32_t FalseWeight) { - assert(isa<BranchInst>(I) || isa<SelectInst>(I)); - // Check that there is at least one non-zero weight. Otherwise, pass - // nullptr to setMetadata which will erase the existing metadata. - MDNode *N = nullptr; - if (TrueWeight || FalseWeight) - N = MDBuilder(I->getParent()->getContext()) - .createBranchWeights(TrueWeight, FalseWeight); - I->setMetadata(LLVMContext::MD_prof, N); -} - -/// If TI is known to be a terminator instruction and its block is known to -/// only have a single predecessor block, check to see if that predecessor is -/// also a value comparison with the same value, and if that comparison -/// determines the outcome of this comparison. If so, simplify TI. This does a -/// very limited form of jump threading. -bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( - Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) { - Value *PredVal = isValueEqualityComparison(Pred->getTerminator()); - if (!PredVal) - return false; // Not a value comparison in predecessor. - - Value *ThisVal = isValueEqualityComparison(TI); - assert(ThisVal && "This isn't a value comparison!!"); - if (ThisVal != PredVal) - return false; // Different predicates. - - // TODO: Preserve branch weight metadata, similarly to how - // FoldValueComparisonIntoPredecessors preserves it. - - // Find out information about when control will move from Pred to TI's block. - std::vector<ValueEqualityComparisonCase> PredCases; - BasicBlock *PredDef = - GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases); - EliminateBlockCases(PredDef, PredCases); // Remove default from cases. - - // Find information about how control leaves this block. - std::vector<ValueEqualityComparisonCase> ThisCases; - BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases); - EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases. - - // If TI's block is the default block from Pred's comparison, potentially - // simplify TI based on this knowledge. - if (PredDef == TI->getParent()) { - // If we are here, we know that the value is none of those cases listed in - // PredCases. If there are any cases in ThisCases that are in PredCases, we - // can simplify TI. - if (!ValuesOverlap(PredCases, ThisCases)) - return false; - - if (isa<BranchInst>(TI)) { - // Okay, one of the successors of this condbr is dead. Convert it to a - // uncond br. - assert(ThisCases.size() == 1 && "Branch can only have one case!"); - // Insert the new branch. - Instruction *NI = Builder.CreateBr(ThisDef); - (void)NI; - - // Remove PHI node entries for the dead edge. - ThisCases[0].Dest->removePredecessor(TI->getParent()); - - LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() - << "Through successor TI: " << *TI << "Leaving: " << *NI - << "\n"); - - EraseTerminatorAndDCECond(TI); - return true; - } - - SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI); - // Okay, TI has cases that are statically dead, prune them away. - SmallPtrSet<Constant *, 16> DeadCases; - for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - DeadCases.insert(PredCases[i].Value); - - LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() - << "Through successor TI: " << *TI); - - for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) { - --i; - if (DeadCases.count(i->getCaseValue())) { - i->getCaseSuccessor()->removePredecessor(TI->getParent()); - SI.removeCase(i); - } - } - LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n"); - return true; - } - - // Otherwise, TI's block must correspond to some matched value. Find out - // which value (or set of values) this is. - ConstantInt *TIV = nullptr; - BasicBlock *TIBB = TI->getParent(); - for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - if (PredCases[i].Dest == TIBB) { - if (TIV) - return false; // Cannot handle multiple values coming to this block. - TIV = PredCases[i].Value; - } - assert(TIV && "No edge from pred to succ?"); - - // Okay, we found the one constant that our value can be if we get into TI's - // BB. Find out which successor will unconditionally be branched to. - BasicBlock *TheRealDest = nullptr; - for (unsigned i = 0, e = ThisCases.size(); i != e; ++i) - if (ThisCases[i].Value == TIV) { - TheRealDest = ThisCases[i].Dest; - break; - } - - // If not handled by any explicit cases, it is handled by the default case. - if (!TheRealDest) - TheRealDest = ThisDef; - - // Remove PHI node entries for dead edges. - BasicBlock *CheckEdge = TheRealDest; - for (BasicBlock *Succ : successors(TIBB)) - if (Succ != CheckEdge) - Succ->removePredecessor(TIBB); - else - CheckEdge = nullptr; - - // Insert the new branch. - Instruction *NI = Builder.CreateBr(TheRealDest); - (void)NI; - - LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() - << "Through successor TI: " << *TI << "Leaving: " << *NI - << "\n"); - - EraseTerminatorAndDCECond(TI); - return true; -} - -namespace { - -/// This class implements a stable ordering of constant -/// integers that does not depend on their address. This is important for -/// applications that sort ConstantInt's to ensure uniqueness. -struct ConstantIntOrdering { - bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const { - return LHS->getValue().ult(RHS->getValue()); - } -}; - -} // end anonymous namespace - -static int ConstantIntSortPredicate(ConstantInt *const *P1, - ConstantInt *const *P2) { - const ConstantInt *LHS = *P1; - const ConstantInt *RHS = *P2; - if (LHS == RHS) - return 0; - return LHS->getValue().ult(RHS->getValue()) ? 1 : -1; -} - -static inline bool HasBranchWeights(const Instruction *I) { - MDNode *ProfMD = I->getMetadata(LLVMContext::MD_prof); - if (ProfMD && ProfMD->getOperand(0)) - if (MDString *MDS = dyn_cast<MDString>(ProfMD->getOperand(0))) - return MDS->getString().equals("branch_weights"); - - return false; -} - -/// Get Weights of a given terminator, the default weight is at the front -/// of the vector. If TI is a conditional eq, we need to swap the branch-weight -/// metadata. -static void GetBranchWeights(Instruction *TI, - SmallVectorImpl<uint64_t> &Weights) { - MDNode *MD = TI->getMetadata(LLVMContext::MD_prof); - assert(MD); - for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) { - ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(i)); - Weights.push_back(CI->getValue().getZExtValue()); - } - - // If TI is a conditional eq, the default case is the false case, - // and the corresponding branch-weight data is at index 2. We swap the - // default weight to be the first entry. - if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { - assert(Weights.size() == 2); - ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); - if (ICI->getPredicate() == ICmpInst::ICMP_EQ) - std::swap(Weights.front(), Weights.back()); - } -} - -/// Keep halving the weights until all can fit in uint32_t. -static void FitWeights(MutableArrayRef<uint64_t> Weights) { - uint64_t Max = *std::max_element(Weights.begin(), Weights.end()); - if (Max > UINT_MAX) { - unsigned Offset = 32 - countLeadingZeros(Max); - for (uint64_t &I : Weights) - I >>= Offset; - } -} - -/// The specified terminator is a value equality comparison instruction -/// (either a switch or a branch on "X == c"). -/// See if any of the predecessors of the terminator block are value comparisons -/// on the same value. If so, and if safe to do so, fold them together. -bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI, - IRBuilder<> &Builder) { - BasicBlock *BB = TI->getParent(); - Value *CV = isValueEqualityComparison(TI); // CondVal - assert(CV && "Not a comparison?"); - bool Changed = false; - - SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); - while (!Preds.empty()) { - BasicBlock *Pred = Preds.pop_back_val(); - - // See if the predecessor is a comparison with the same value. - Instruction *PTI = Pred->getTerminator(); - Value *PCV = isValueEqualityComparison(PTI); // PredCondVal - - if (PCV == CV && TI != PTI) { - SmallSetVector<BasicBlock*, 4> FailBlocks; - if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) { - for (auto *Succ : FailBlocks) { - if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split")) - return false; - } - } - - // Figure out which 'cases' to copy from SI to PSI. - std::vector<ValueEqualityComparisonCase> BBCases; - BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases); - - std::vector<ValueEqualityComparisonCase> PredCases; - BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases); - - // Based on whether the default edge from PTI goes to BB or not, fill in - // PredCases and PredDefault with the new switch cases we would like to - // build. - SmallVector<BasicBlock *, 8> NewSuccessors; - - // Update the branch weight metadata along the way - SmallVector<uint64_t, 8> Weights; - bool PredHasWeights = HasBranchWeights(PTI); - bool SuccHasWeights = HasBranchWeights(TI); - - if (PredHasWeights) { - GetBranchWeights(PTI, Weights); - // branch-weight metadata is inconsistent here. - if (Weights.size() != 1 + PredCases.size()) - PredHasWeights = SuccHasWeights = false; - } else if (SuccHasWeights) - // If there are no predecessor weights but there are successor weights, - // populate Weights with 1, which will later be scaled to the sum of - // successor's weights - Weights.assign(1 + PredCases.size(), 1); - - SmallVector<uint64_t, 8> SuccWeights; - if (SuccHasWeights) { - GetBranchWeights(TI, SuccWeights); - // branch-weight metadata is inconsistent here. - if (SuccWeights.size() != 1 + BBCases.size()) - PredHasWeights = SuccHasWeights = false; - } else if (PredHasWeights) - SuccWeights.assign(1 + BBCases.size(), 1); - - if (PredDefault == BB) { - // If this is the default destination from PTI, only the edges in TI - // that don't occur in PTI, or that branch to BB will be activated. - std::set<ConstantInt *, ConstantIntOrdering> PTIHandled; - for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - if (PredCases[i].Dest != BB) - PTIHandled.insert(PredCases[i].Value); - else { - // The default destination is BB, we don't need explicit targets. - std::swap(PredCases[i], PredCases.back()); - - if (PredHasWeights || SuccHasWeights) { - // Increase weight for the default case. - Weights[0] += Weights[i + 1]; - std::swap(Weights[i + 1], Weights.back()); - Weights.pop_back(); - } - - PredCases.pop_back(); - --i; - --e; - } - - // Reconstruct the new switch statement we will be building. - if (PredDefault != BBDefault) { - PredDefault->removePredecessor(Pred); - PredDefault = BBDefault; - NewSuccessors.push_back(BBDefault); - } - - unsigned CasesFromPred = Weights.size(); - uint64_t ValidTotalSuccWeight = 0; - for (unsigned i = 0, e = BBCases.size(); i != e; ++i) - if (!PTIHandled.count(BBCases[i].Value) && - BBCases[i].Dest != BBDefault) { - PredCases.push_back(BBCases[i]); - NewSuccessors.push_back(BBCases[i].Dest); - if (SuccHasWeights || PredHasWeights) { - // The default weight is at index 0, so weight for the ith case - // should be at index i+1. Scale the cases from successor by - // PredDefaultWeight (Weights[0]). - Weights.push_back(Weights[0] * SuccWeights[i + 1]); - ValidTotalSuccWeight += SuccWeights[i + 1]; - } - } - - if (SuccHasWeights || PredHasWeights) { - ValidTotalSuccWeight += SuccWeights[0]; - // Scale the cases from predecessor by ValidTotalSuccWeight. - for (unsigned i = 1; i < CasesFromPred; ++i) - Weights[i] *= ValidTotalSuccWeight; - // Scale the default weight by SuccDefaultWeight (SuccWeights[0]). - Weights[0] *= SuccWeights[0]; - } - } else { - // If this is not the default destination from PSI, only the edges - // in SI that occur in PSI with a destination of BB will be - // activated. - std::set<ConstantInt *, ConstantIntOrdering> PTIHandled; - std::map<ConstantInt *, uint64_t> WeightsForHandled; - for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - if (PredCases[i].Dest == BB) { - PTIHandled.insert(PredCases[i].Value); - - if (PredHasWeights || SuccHasWeights) { - WeightsForHandled[PredCases[i].Value] = Weights[i + 1]; - std::swap(Weights[i + 1], Weights.back()); - Weights.pop_back(); - } - - std::swap(PredCases[i], PredCases.back()); - PredCases.pop_back(); - --i; - --e; - } - - // Okay, now we know which constants were sent to BB from the - // predecessor. Figure out where they will all go now. - for (unsigned i = 0, e = BBCases.size(); i != e; ++i) - if (PTIHandled.count(BBCases[i].Value)) { - // If this is one we are capable of getting... - if (PredHasWeights || SuccHasWeights) - Weights.push_back(WeightsForHandled[BBCases[i].Value]); - PredCases.push_back(BBCases[i]); - NewSuccessors.push_back(BBCases[i].Dest); - PTIHandled.erase( - BBCases[i].Value); // This constant is taken care of - } - - // If there are any constants vectored to BB that TI doesn't handle, - // they must go to the default destination of TI. - for (ConstantInt *I : PTIHandled) { - if (PredHasWeights || SuccHasWeights) - Weights.push_back(WeightsForHandled[I]); - PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault)); - NewSuccessors.push_back(BBDefault); - } - } - - // Okay, at this point, we know which new successor Pred will get. Make - // sure we update the number of entries in the PHI nodes for these - // successors. - for (BasicBlock *NewSuccessor : NewSuccessors) - AddPredecessorToBlock(NewSuccessor, Pred, BB); - - Builder.SetInsertPoint(PTI); - // Convert pointer to int before we switch. - if (CV->getType()->isPointerTy()) { - CV = Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), - "magicptr"); - } - - // Now that the successors are updated, create the new Switch instruction. - SwitchInst *NewSI = - Builder.CreateSwitch(CV, PredDefault, PredCases.size()); - NewSI->setDebugLoc(PTI->getDebugLoc()); - for (ValueEqualityComparisonCase &V : PredCases) - NewSI->addCase(V.Value, V.Dest); - - if (PredHasWeights || SuccHasWeights) { - // Halve the weights if any of them cannot fit in an uint32_t - FitWeights(Weights); - - SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); - - setBranchWeights(NewSI, MDWeights); - } - - EraseTerminatorAndDCECond(PTI); - - // Okay, last check. If BB is still a successor of PSI, then we must - // have an infinite loop case. If so, add an infinitely looping block - // to handle the case to preserve the behavior of the code. - BasicBlock *InfLoopBlock = nullptr; - for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i) - if (NewSI->getSuccessor(i) == BB) { - if (!InfLoopBlock) { - // Insert it at the end of the function, because it's either code, - // or it won't matter if it's hot. :) - InfLoopBlock = BasicBlock::Create(BB->getContext(), "infloop", - BB->getParent()); - BranchInst::Create(InfLoopBlock, InfLoopBlock); - } - NewSI->setSuccessor(i, InfLoopBlock); - } - - Changed = true; - } - } - return Changed; -} - -// If we would need to insert a select that uses the value of this invoke -// (comments in HoistThenElseCodeToIf explain why we would need to do this), we -// can't hoist the invoke, as there is nowhere to put the select in this case. -static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, - Instruction *I1, Instruction *I2) { - for (BasicBlock *Succ : successors(BB1)) { - for (const PHINode &PN : Succ->phis()) { - Value *BB1V = PN.getIncomingValueForBlock(BB1); - Value *BB2V = PN.getIncomingValueForBlock(BB2); - if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) { - return false; - } - } - } - return true; -} - -static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I); - -/// Given a conditional branch that goes to BB1 and BB2, hoist any common code -/// in the two blocks up into the branch block. The caller of this function -/// guarantees that BI's block dominates BB1 and BB2. -static bool HoistThenElseCodeToIf(BranchInst *BI, - const TargetTransformInfo &TTI) { - // This does very trivial matching, with limited scanning, to find identical - // instructions in the two blocks. In particular, we don't want to get into - // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As - // such, we currently just scan for obviously identical instructions in an - // identical order. - BasicBlock *BB1 = BI->getSuccessor(0); // The true destination. - BasicBlock *BB2 = BI->getSuccessor(1); // The false destination - - BasicBlock::iterator BB1_Itr = BB1->begin(); - BasicBlock::iterator BB2_Itr = BB2->begin(); - - Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++; - // Skip debug info if it is not identical. - DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1); - DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2); - if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { - while (isa<DbgInfoIntrinsic>(I1)) - I1 = &*BB1_Itr++; - while (isa<DbgInfoIntrinsic>(I2)) - I2 = &*BB2_Itr++; - } - // FIXME: Can we define a safety predicate for CallBr? - if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) || - (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) || - isa<CallBrInst>(I1)) - return false; - - BasicBlock *BIParent = BI->getParent(); - - bool Changed = false; - do { - // If we are hoisting the terminator instruction, don't move one (making a - // broken BB), instead clone it, and remove BI. - if (I1->isTerminator()) - goto HoistTerminator; - - // If we're going to hoist a call, make sure that the two instructions we're - // commoning/hoisting are both marked with musttail, or neither of them is - // marked as such. Otherwise, we might end up in a situation where we hoist - // from a block where the terminator is a `ret` to a block where the terminator - // is a `br`, and `musttail` calls expect to be followed by a return. - auto *C1 = dyn_cast<CallInst>(I1); - auto *C2 = dyn_cast<CallInst>(I2); - if (C1 && C2) - if (C1->isMustTailCall() != C2->isMustTailCall()) - return Changed; - - if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2)) - return Changed; - - if (isa<DbgInfoIntrinsic>(I1) || isa<DbgInfoIntrinsic>(I2)) { - assert (isa<DbgInfoIntrinsic>(I1) && isa<DbgInfoIntrinsic>(I2)); - // The debug location is an integral part of a debug info intrinsic - // and can't be separated from it or replaced. Instead of attempting - // to merge locations, simply hoist both copies of the intrinsic. - BIParent->getInstList().splice(BI->getIterator(), - BB1->getInstList(), I1); - BIParent->getInstList().splice(BI->getIterator(), - BB2->getInstList(), I2); - Changed = true; - } else { - // For a normal instruction, we just move one to right before the branch, - // then replace all uses of the other with the first. Finally, we remove - // the now redundant second instruction. - BIParent->getInstList().splice(BI->getIterator(), - BB1->getInstList(), I1); - if (!I2->use_empty()) - I2->replaceAllUsesWith(I1); - I1->andIRFlags(I2); - unsigned KnownIDs[] = {LLVMContext::MD_tbaa, - LLVMContext::MD_range, - LLVMContext::MD_fpmath, - LLVMContext::MD_invariant_load, - LLVMContext::MD_nonnull, - LLVMContext::MD_invariant_group, - LLVMContext::MD_align, - LLVMContext::MD_dereferenceable, - LLVMContext::MD_dereferenceable_or_null, - LLVMContext::MD_mem_parallel_loop_access, - LLVMContext::MD_access_group}; - combineMetadata(I1, I2, KnownIDs, true); - - // I1 and I2 are being combined into a single instruction. Its debug - // location is the merged locations of the original instructions. - I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); - - I2->eraseFromParent(); - Changed = true; - } - - I1 = &*BB1_Itr++; - I2 = &*BB2_Itr++; - // Skip debug info if it is not identical. - DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1); - DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2); - if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { - while (isa<DbgInfoIntrinsic>(I1)) - I1 = &*BB1_Itr++; - while (isa<DbgInfoIntrinsic>(I2)) - I2 = &*BB2_Itr++; - } - } while (I1->isIdenticalToWhenDefined(I2)); - - return true; - -HoistTerminator: - // It may not be possible to hoist an invoke. - // FIXME: Can we define a safety predicate for CallBr? - if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) - return Changed; - - // TODO: callbr hoisting currently disabled pending further study. - if (isa<CallBrInst>(I1)) - return Changed; - - for (BasicBlock *Succ : successors(BB1)) { - for (PHINode &PN : Succ->phis()) { - Value *BB1V = PN.getIncomingValueForBlock(BB1); - Value *BB2V = PN.getIncomingValueForBlock(BB2); - if (BB1V == BB2V) - continue; - - // Check for passingValueIsAlwaysUndefined here because we would rather - // eliminate undefined control flow then converting it to a select. - if (passingValueIsAlwaysUndefined(BB1V, &PN) || - passingValueIsAlwaysUndefined(BB2V, &PN)) - return Changed; - - if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V)) - return Changed; - if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V)) - return Changed; - } - } - - // Okay, it is safe to hoist the terminator. - Instruction *NT = I1->clone(); - BIParent->getInstList().insert(BI->getIterator(), NT); - if (!NT->getType()->isVoidTy()) { - I1->replaceAllUsesWith(NT); - I2->replaceAllUsesWith(NT); - NT->takeName(I1); - } - - // Ensure terminator gets a debug location, even an unknown one, in case - // it involves inlinable calls. - NT->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); - - // PHIs created below will adopt NT's merged DebugLoc. - IRBuilder<NoFolder> Builder(NT); - - // Hoisting one of the terminators from our successor is a great thing. - // Unfortunately, the successors of the if/else blocks may have PHI nodes in - // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI - // nodes, so we insert select instruction to compute the final result. - std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects; - for (BasicBlock *Succ : successors(BB1)) { - for (PHINode &PN : Succ->phis()) { - Value *BB1V = PN.getIncomingValueForBlock(BB1); - Value *BB2V = PN.getIncomingValueForBlock(BB2); - if (BB1V == BB2V) - continue; - - // These values do not agree. Insert a select instruction before NT - // that determines the right value. - SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; - if (!SI) - SI = cast<SelectInst>( - Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, - BB1V->getName() + "." + BB2V->getName(), BI)); - - // Make the PHI node use the select for all incoming values for BB1/BB2 - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) - if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2) - PN.setIncomingValue(i, SI); - } - } - - // Update any PHI nodes in our new successors. - for (BasicBlock *Succ : successors(BB1)) - AddPredecessorToBlock(Succ, BIParent, BB1); - - EraseTerminatorAndDCECond(BI); - return true; -} - -// All instructions in Insts belong to different blocks that all unconditionally -// branch to a common successor. Analyze each instruction and return true if it -// would be possible to sink them into their successor, creating one common -// instruction instead. For every value that would be required to be provided by -// PHI node (because an operand varies in each input block), add to PHIOperands. -static bool canSinkInstructions( - ArrayRef<Instruction *> Insts, - DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) { - // Prune out obviously bad instructions to move. Any non-store instruction - // must have exactly one use, and we check later that use is by a single, - // common PHI instruction in the successor. - for (auto *I : Insts) { - // These instructions may change or break semantics if moved. - if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) || - I->getType()->isTokenTy()) - return false; - - // Conservatively return false if I is an inline-asm instruction. Sinking - // and merging inline-asm instructions can potentially create arguments - // that cannot satisfy the inline-asm constraints. - if (const auto *C = dyn_cast<CallBase>(I)) - if (C->isInlineAsm()) - return false; - - // Everything must have only one use too, apart from stores which - // have no uses. - if (!isa<StoreInst>(I) && !I->hasOneUse()) - return false; - } - - const Instruction *I0 = Insts.front(); - for (auto *I : Insts) - if (!I->isSameOperationAs(I0)) - return false; - - // All instructions in Insts are known to be the same opcode. If they aren't - // stores, check the only user of each is a PHI or in the same block as the - // instruction, because if a user is in the same block as an instruction - // we're contemplating sinking, it must already be determined to be sinkable. - if (!isa<StoreInst>(I0)) { - auto *PNUse = dyn_cast<PHINode>(*I0->user_begin()); - auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0); - if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool { - auto *U = cast<Instruction>(*I->user_begin()); - return (PNUse && - PNUse->getParent() == Succ && - PNUse->getIncomingValueForBlock(I->getParent()) == I) || - U->getParent() == I->getParent(); - })) - return false; - } - - // Because SROA can't handle speculating stores of selects, try not - // to sink loads or stores of allocas when we'd have to create a PHI for - // the address operand. Also, because it is likely that loads or stores - // of allocas will disappear when Mem2Reg/SROA is run, don't sink them. - // This can cause code churn which can have unintended consequences down - // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244. - // FIXME: This is a workaround for a deficiency in SROA - see - // https://llvm.org/bugs/show_bug.cgi?id=30188 - if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) { - return isa<AllocaInst>(I->getOperand(1)); - })) - return false; - if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) { - return isa<AllocaInst>(I->getOperand(0)); - })) - return false; - - for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) { - if (I0->getOperand(OI)->getType()->isTokenTy()) - // Don't touch any operand of token type. - return false; - - auto SameAsI0 = [&I0, OI](const Instruction *I) { - assert(I->getNumOperands() == I0->getNumOperands()); - return I->getOperand(OI) == I0->getOperand(OI); - }; - if (!all_of(Insts, SameAsI0)) { - if (!canReplaceOperandWithVariable(I0, OI)) - // We can't create a PHI from this GEP. - return false; - // Don't create indirect calls! The called value is the final operand. - if (isa<CallBase>(I0) && OI == OE - 1) { - // FIXME: if the call was *already* indirect, we should do this. - return false; - } - for (auto *I : Insts) - PHIOperands[I].push_back(I->getOperand(OI)); - } - } - return true; -} - -// Assuming canSinkLastInstruction(Blocks) has returned true, sink the last -// instruction of every block in Blocks to their common successor, commoning -// into one instruction. -static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { - auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0); - - // canSinkLastInstruction returning true guarantees that every block has at - // least one non-terminator instruction. - SmallVector<Instruction*,4> Insts; - for (auto *BB : Blocks) { - Instruction *I = BB->getTerminator(); - do { - I = I->getPrevNode(); - } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front()); - if (!isa<DbgInfoIntrinsic>(I)) - Insts.push_back(I); - } - - // The only checking we need to do now is that all users of all instructions - // are the same PHI node. canSinkLastInstruction should have checked this but - // it is slightly over-aggressive - it gets confused by commutative instructions - // so double-check it here. - Instruction *I0 = Insts.front(); - if (!isa<StoreInst>(I0)) { - auto *PNUse = dyn_cast<PHINode>(*I0->user_begin()); - if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool { - auto *U = cast<Instruction>(*I->user_begin()); - return U == PNUse; - })) - return false; - } - - // We don't need to do any more checking here; canSinkLastInstruction should - // have done it all for us. - SmallVector<Value*, 4> NewOperands; - for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) { - // This check is different to that in canSinkLastInstruction. There, we - // cared about the global view once simplifycfg (and instcombine) have - // completed - it takes into account PHIs that become trivially - // simplifiable. However here we need a more local view; if an operand - // differs we create a PHI and rely on instcombine to clean up the very - // small mess we may make. - bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) { - return I->getOperand(O) != I0->getOperand(O); - }); - if (!NeedPHI) { - NewOperands.push_back(I0->getOperand(O)); - continue; - } - - // Create a new PHI in the successor block and populate it. - auto *Op = I0->getOperand(O); - assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!"); - auto *PN = PHINode::Create(Op->getType(), Insts.size(), - Op->getName() + ".sink", &BBEnd->front()); - for (auto *I : Insts) - PN->addIncoming(I->getOperand(O), I->getParent()); - NewOperands.push_back(PN); - } - - // Arbitrarily use I0 as the new "common" instruction; remap its operands - // and move it to the start of the successor block. - for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) - I0->getOperandUse(O).set(NewOperands[O]); - I0->moveBefore(&*BBEnd->getFirstInsertionPt()); - - // Update metadata and IR flags, and merge debug locations. - for (auto *I : Insts) - if (I != I0) { - // The debug location for the "common" instruction is the merged locations - // of all the commoned instructions. We start with the original location - // of the "common" instruction and iteratively merge each location in the - // loop below. - // This is an N-way merge, which will be inefficient if I0 is a CallInst. - // However, as N-way merge for CallInst is rare, so we use simplified API - // instead of using complex API for N-way merge. - I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc()); - combineMetadataForCSE(I0, I, true); - I0->andIRFlags(I); - } - - if (!isa<StoreInst>(I0)) { - // canSinkLastInstruction checked that all instructions were used by - // one and only one PHI node. Find that now, RAUW it to our common - // instruction and nuke it. - assert(I0->hasOneUse()); - auto *PN = cast<PHINode>(*I0->user_begin()); - PN->replaceAllUsesWith(I0); - PN->eraseFromParent(); - } - - // Finally nuke all instructions apart from the common instruction. - for (auto *I : Insts) - if (I != I0) - I->eraseFromParent(); - - return true; -} - -namespace { - - // LockstepReverseIterator - Iterates through instructions - // in a set of blocks in reverse order from the first non-terminator. - // For example (assume all blocks have size n): - // LockstepReverseIterator I([B1, B2, B3]); - // *I-- = [B1[n], B2[n], B3[n]]; - // *I-- = [B1[n-1], B2[n-1], B3[n-1]]; - // *I-- = [B1[n-2], B2[n-2], B3[n-2]]; - // ... - class LockstepReverseIterator { - ArrayRef<BasicBlock*> Blocks; - SmallVector<Instruction*,4> Insts; - bool Fail; - - public: - LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) { - reset(); - } - - void reset() { - Fail = false; - Insts.clear(); - for (auto *BB : Blocks) { - Instruction *Inst = BB->getTerminator(); - for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);) - Inst = Inst->getPrevNode(); - if (!Inst) { - // Block wasn't big enough. - Fail = true; - return; - } - Insts.push_back(Inst); - } - } - - bool isValid() const { - return !Fail; - } - - void operator--() { - if (Fail) - return; - for (auto *&Inst : Insts) { - for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);) - Inst = Inst->getPrevNode(); - // Already at beginning of block. - if (!Inst) { - Fail = true; - return; - } - } - } - - ArrayRef<Instruction*> operator * () const { - return Insts; - } - }; - -} // end anonymous namespace - -/// Check whether BB's predecessors end with unconditional branches. If it is -/// true, sink any common code from the predecessors to BB. -/// We also allow one predecessor to end with conditional branch (but no more -/// than one). -static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { - // We support two situations: - // (1) all incoming arcs are unconditional - // (2) one incoming arc is conditional - // - // (2) is very common in switch defaults and - // else-if patterns; - // - // if (a) f(1); - // else if (b) f(2); - // - // produces: - // - // [if] - // / \ - // [f(1)] [if] - // | | \ - // | | | - // | [f(2)]| - // \ | / - // [ end ] - // - // [end] has two unconditional predecessor arcs and one conditional. The - // conditional refers to the implicit empty 'else' arc. This conditional - // arc can also be caused by an empty default block in a switch. - // - // In this case, we attempt to sink code from all *unconditional* arcs. - // If we can sink instructions from these arcs (determined during the scan - // phase below) we insert a common successor for all unconditional arcs and - // connect that to [end], to enable sinking: - // - // [if] - // / \ - // [x(1)] [if] - // | | \ - // | | \ - // | [x(2)] | - // \ / | - // [sink.split] | - // \ / - // [ end ] - // - SmallVector<BasicBlock*,4> UnconditionalPreds; - Instruction *Cond = nullptr; - for (auto *B : predecessors(BB)) { - auto *T = B->getTerminator(); - if (isa<BranchInst>(T) && cast<BranchInst>(T)->isUnconditional()) - UnconditionalPreds.push_back(B); - else if ((isa<BranchInst>(T) || isa<SwitchInst>(T)) && !Cond) - Cond = T; - else - return false; - } - if (UnconditionalPreds.size() < 2) - return false; - - bool Changed = false; - // We take a two-step approach to tail sinking. First we scan from the end of - // each block upwards in lockstep. If the n'th instruction from the end of each - // block can be sunk, those instructions are added to ValuesToSink and we - // carry on. If we can sink an instruction but need to PHI-merge some operands - // (because they're not identical in each instruction) we add these to - // PHIOperands. - unsigned ScanIdx = 0; - SmallPtrSet<Value*,4> InstructionsToSink; - DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands; - LockstepReverseIterator LRI(UnconditionalPreds); - while (LRI.isValid() && - canSinkInstructions(*LRI, PHIOperands)) { - LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0] - << "\n"); - InstructionsToSink.insert((*LRI).begin(), (*LRI).end()); - ++ScanIdx; - --LRI; - } - - auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) { - unsigned NumPHIdValues = 0; - for (auto *I : *LRI) - for (auto *V : PHIOperands[I]) - if (InstructionsToSink.count(V) == 0) - ++NumPHIdValues; - LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n"); - unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size(); - if ((NumPHIdValues % UnconditionalPreds.size()) != 0) - NumPHIInsts++; - - return NumPHIInsts <= 1; - }; - - if (ScanIdx > 0 && Cond) { - // Check if we would actually sink anything first! This mutates the CFG and - // adds an extra block. The goal in doing this is to allow instructions that - // couldn't be sunk before to be sunk - obviously, speculatable instructions - // (such as trunc, add) can be sunk and predicated already. So we check that - // we're going to sink at least one non-speculatable instruction. - LRI.reset(); - unsigned Idx = 0; - bool Profitable = false; - while (ProfitableToSinkInstruction(LRI) && Idx < ScanIdx) { - if (!isSafeToSpeculativelyExecute((*LRI)[0])) { - Profitable = true; - break; - } - --LRI; - ++Idx; - } - if (!Profitable) - return false; - - LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n"); - // We have a conditional edge and we're going to sink some instructions. - // Insert a new block postdominating all blocks we're going to sink from. - if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split")) - // Edges couldn't be split. - return false; - Changed = true; - } - - // Now that we've analyzed all potential sinking candidates, perform the - // actual sink. We iteratively sink the last non-terminator of the source - // blocks into their common successor unless doing so would require too - // many PHI instructions to be generated (currently only one PHI is allowed - // per sunk instruction). - // - // We can use InstructionsToSink to discount values needing PHI-merging that will - // actually be sunk in a later iteration. This allows us to be more - // aggressive in what we sink. This does allow a false positive where we - // sink presuming a later value will also be sunk, but stop half way through - // and never actually sink it which means we produce more PHIs than intended. - // This is unlikely in practice though. - for (unsigned SinkIdx = 0; SinkIdx != ScanIdx; ++SinkIdx) { - LLVM_DEBUG(dbgs() << "SINK: Sink: " - << *UnconditionalPreds[0]->getTerminator()->getPrevNode() - << "\n"); - - // Because we've sunk every instruction in turn, the current instruction to - // sink is always at index 0. - LRI.reset(); - if (!ProfitableToSinkInstruction(LRI)) { - // Too many PHIs would be created. - LLVM_DEBUG( - dbgs() << "SINK: stopping here, too many PHIs would be created!\n"); - break; - } - - if (!sinkLastInstruction(UnconditionalPreds)) - return Changed; - NumSinkCommons++; - Changed = true; - } - return Changed; -} - -/// Determine if we can hoist sink a sole store instruction out of a -/// conditional block. -/// -/// We are looking for code like the following: -/// BrBB: -/// store i32 %add, i32* %arrayidx2 -/// ... // No other stores or function calls (we could be calling a memory -/// ... // function). -/// %cmp = icmp ult %x, %y -/// br i1 %cmp, label %EndBB, label %ThenBB -/// ThenBB: -/// store i32 %add5, i32* %arrayidx2 -/// br label EndBB -/// EndBB: -/// ... -/// We are going to transform this into: -/// BrBB: -/// store i32 %add, i32* %arrayidx2 -/// ... // -/// %cmp = icmp ult %x, %y -/// %add.add5 = select i1 %cmp, i32 %add, %add5 -/// store i32 %add.add5, i32* %arrayidx2 -/// ... -/// -/// \return The pointer to the value of the previous store if the store can be -/// hoisted into the predecessor block. 0 otherwise. -static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, - BasicBlock *StoreBB, BasicBlock *EndBB) { - StoreInst *StoreToHoist = dyn_cast<StoreInst>(I); - if (!StoreToHoist) - return nullptr; - - // Volatile or atomic. - if (!StoreToHoist->isSimple()) - return nullptr; - - Value *StorePtr = StoreToHoist->getPointerOperand(); - - // Look for a store to the same pointer in BrBB. - unsigned MaxNumInstToLookAt = 9; - for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug())) { - if (!MaxNumInstToLookAt) - break; - --MaxNumInstToLookAt; - - // Could be calling an instruction that affects memory like free(). - if (CurI.mayHaveSideEffects() && !isa<StoreInst>(CurI)) - return nullptr; - - if (auto *SI = dyn_cast<StoreInst>(&CurI)) { - // Found the previous store make sure it stores to the same location. - if (SI->getPointerOperand() == StorePtr) - // Found the previous store, return its value operand. - return SI->getValueOperand(); - return nullptr; // Unknown store. - } - } - - return nullptr; -} - -/// Speculate a conditional basic block flattening the CFG. -/// -/// Note that this is a very risky transform currently. Speculating -/// instructions like this is most often not desirable. Instead, there is an MI -/// pass which can do it with full awareness of the resource constraints. -/// However, some cases are "obvious" and we should do directly. An example of -/// this is speculating a single, reasonably cheap instruction. -/// -/// There is only one distinct advantage to flattening the CFG at the IR level: -/// it makes very common but simplistic optimizations such as are common in -/// instcombine and the DAG combiner more powerful by removing CFG edges and -/// modeling their effects with easier to reason about SSA value graphs. -/// -/// -/// An illustration of this transform is turning this IR: -/// \code -/// BB: -/// %cmp = icmp ult %x, %y -/// br i1 %cmp, label %EndBB, label %ThenBB -/// ThenBB: -/// %sub = sub %x, %y -/// br label BB2 -/// EndBB: -/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ] -/// ... -/// \endcode -/// -/// Into this IR: -/// \code -/// BB: -/// %cmp = icmp ult %x, %y -/// %sub = sub %x, %y -/// %cond = select i1 %cmp, 0, %sub -/// ... -/// \endcode -/// -/// \returns true if the conditional block is removed. -static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, - const TargetTransformInfo &TTI) { - // Be conservative for now. FP select instruction can often be expensive. - Value *BrCond = BI->getCondition(); - if (isa<FCmpInst>(BrCond)) - return false; - - BasicBlock *BB = BI->getParent(); - BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0); - - // If ThenBB is actually on the false edge of the conditional branch, remember - // to swap the select operands later. - bool Invert = false; - if (ThenBB != BI->getSuccessor(0)) { - assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?"); - Invert = true; - } - assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block"); - - // Keep a count of how many times instructions are used within ThenBB when - // they are candidates for sinking into ThenBB. Specifically: - // - They are defined in BB, and - // - They have no side effects, and - // - All of their uses are in ThenBB. - SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts; - - SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics; - - unsigned SpeculationCost = 0; - Value *SpeculatedStoreValue = nullptr; - StoreInst *SpeculatedStore = nullptr; - for (BasicBlock::iterator BBI = ThenBB->begin(), - BBE = std::prev(ThenBB->end()); - BBI != BBE; ++BBI) { - Instruction *I = &*BBI; - // Skip debug info. - if (isa<DbgInfoIntrinsic>(I)) { - SpeculatedDbgIntrinsics.push_back(I); - continue; - } - - // Only speculatively execute a single instruction (not counting the - // terminator) for now. - ++SpeculationCost; - if (SpeculationCost > 1) - return false; - - // Don't hoist the instruction if it's unsafe or expensive. - if (!isSafeToSpeculativelyExecute(I) && - !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore( - I, BB, ThenBB, EndBB)))) - return false; - if (!SpeculatedStoreValue && - ComputeSpeculationCost(I, TTI) > - PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic) - return false; - - // Store the store speculation candidate. - if (SpeculatedStoreValue) - SpeculatedStore = cast<StoreInst>(I); - - // Do not hoist the instruction if any of its operands are defined but not - // used in BB. The transformation will prevent the operand from - // being sunk into the use block. - for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) { - Instruction *OpI = dyn_cast<Instruction>(*i); - if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects()) - continue; // Not a candidate for sinking. - - ++SinkCandidateUseCounts[OpI]; - } - } - - // Consider any sink candidates which are only used in ThenBB as costs for - // speculation. Note, while we iterate over a DenseMap here, we are summing - // and so iteration order isn't significant. - for (SmallDenseMap<Instruction *, unsigned, 4>::iterator - I = SinkCandidateUseCounts.begin(), - E = SinkCandidateUseCounts.end(); - I != E; ++I) - if (I->first->hasNUses(I->second)) { - ++SpeculationCost; - if (SpeculationCost > 1) - return false; - } - - // Check that the PHI nodes can be converted to selects. - bool HaveRewritablePHIs = false; - for (PHINode &PN : EndBB->phis()) { - Value *OrigV = PN.getIncomingValueForBlock(BB); - Value *ThenV = PN.getIncomingValueForBlock(ThenBB); - - // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf. - // Skip PHIs which are trivial. - if (ThenV == OrigV) - continue; - - // Don't convert to selects if we could remove undefined behavior instead. - if (passingValueIsAlwaysUndefined(OrigV, &PN) || - passingValueIsAlwaysUndefined(ThenV, &PN)) - return false; - - HaveRewritablePHIs = true; - ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV); - ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV); - if (!OrigCE && !ThenCE) - continue; // Known safe and cheap. - - if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || - (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) - return false; - unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0; - unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0; - unsigned MaxCost = - 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; - if (OrigCost + ThenCost > MaxCost) - return false; - - // Account for the cost of an unfolded ConstantExpr which could end up - // getting expanded into Instructions. - // FIXME: This doesn't account for how many operations are combined in the - // constant expression. - ++SpeculationCost; - if (SpeculationCost > 1) - return false; - } - - // If there are no PHIs to process, bail early. This helps ensure idempotence - // as well. - if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue)) - return false; - - // If we get here, we can hoist the instruction and if-convert. - LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";); - - // Insert a select of the value of the speculated store. - if (SpeculatedStoreValue) { - IRBuilder<NoFolder> Builder(BI); - Value *TrueV = SpeculatedStore->getValueOperand(); - Value *FalseV = SpeculatedStoreValue; - if (Invert) - std::swap(TrueV, FalseV); - Value *S = Builder.CreateSelect( - BrCond, TrueV, FalseV, "spec.store.select", BI); - SpeculatedStore->setOperand(0, S); - SpeculatedStore->applyMergedLocation(BI->getDebugLoc(), - SpeculatedStore->getDebugLoc()); - } - - // Metadata can be dependent on the condition we are hoisting above. - // Conservatively strip all metadata on the instruction. - for (auto &I : *ThenBB) - I.dropUnknownNonDebugMetadata(); - - // Hoist the instructions. - BB->getInstList().splice(BI->getIterator(), ThenBB->getInstList(), - ThenBB->begin(), std::prev(ThenBB->end())); - - // Insert selects and rewrite the PHI operands. - IRBuilder<NoFolder> Builder(BI); - for (PHINode &PN : EndBB->phis()) { - unsigned OrigI = PN.getBasicBlockIndex(BB); - unsigned ThenI = PN.getBasicBlockIndex(ThenBB); - Value *OrigV = PN.getIncomingValue(OrigI); - Value *ThenV = PN.getIncomingValue(ThenI); - - // Skip PHIs which are trivial. - if (OrigV == ThenV) - continue; - - // Create a select whose true value is the speculatively executed value and - // false value is the preexisting value. Swap them if the branch - // destinations were inverted. - Value *TrueV = ThenV, *FalseV = OrigV; - if (Invert) - std::swap(TrueV, FalseV); - Value *V = Builder.CreateSelect( - BrCond, TrueV, FalseV, "spec.select", BI); - PN.setIncomingValue(OrigI, V); - PN.setIncomingValue(ThenI, V); - } - - // Remove speculated dbg intrinsics. - // FIXME: Is it possible to do this in a more elegant way? Moving/merging the - // dbg value for the different flows and inserting it after the select. - for (Instruction *I : SpeculatedDbgIntrinsics) - I->eraseFromParent(); - - ++NumSpeculations; - return true; -} - -/// Return true if we can thread a branch across this block. -static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { - unsigned Size = 0; - - for (Instruction &I : BB->instructionsWithoutDebug()) { - if (Size > 10) - return false; // Don't clone large BB's. - ++Size; - - // We can only support instructions that do not define values that are - // live outside of the current basic block. - for (User *U : I.users()) { - Instruction *UI = cast<Instruction>(U); - if (UI->getParent() != BB || isa<PHINode>(UI)) - return false; - } - - // Looks ok, continue checking. - } - - return true; -} - -/// If we have a conditional branch on a PHI node value that is defined in the -/// same block as the branch and if any PHI entries are constants, thread edges -/// corresponding to that entry to be branches to their ultimate destination. -static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL, - AssumptionCache *AC) { - BasicBlock *BB = BI->getParent(); - PHINode *PN = dyn_cast<PHINode>(BI->getCondition()); - // NOTE: we currently cannot transform this case if the PHI node is used - // outside of the block. - if (!PN || PN->getParent() != BB || !PN->hasOneUse()) - return false; - - // Degenerate case of a single entry PHI. - if (PN->getNumIncomingValues() == 1) { - FoldSingleEntryPHINodes(PN->getParent()); - return true; - } - - // Now we know that this block has multiple preds and two succs. - if (!BlockIsSimpleEnoughToThreadThrough(BB)) - return false; - - // Can't fold blocks that contain noduplicate or convergent calls. - if (any_of(*BB, [](const Instruction &I) { - const CallInst *CI = dyn_cast<CallInst>(&I); - return CI && (CI->cannotDuplicate() || CI->isConvergent()); - })) - return false; - - // Okay, this is a simple enough basic block. See if any phi values are - // constants. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i)); - if (!CB || !CB->getType()->isIntegerTy(1)) - continue; - - // Okay, we now know that all edges from PredBB should be revectored to - // branch to RealDest. - BasicBlock *PredBB = PN->getIncomingBlock(i); - BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); - - if (RealDest == BB) - continue; // Skip self loops. - // Skip if the predecessor's terminator is an indirect branch. - if (isa<IndirectBrInst>(PredBB->getTerminator())) - continue; - - // The dest block might have PHI nodes, other predecessors and other - // difficult cases. Instead of being smart about this, just insert a new - // block that jumps to the destination block, effectively splitting - // the edge we are about to create. - BasicBlock *EdgeBB = - BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge", - RealDest->getParent(), RealDest); - BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB); - CritEdgeBranch->setDebugLoc(BI->getDebugLoc()); - - // Update PHI nodes. - AddPredecessorToBlock(RealDest, EdgeBB, BB); - - // BB may have instructions that are being threaded over. Clone these - // instructions into EdgeBB. We know that there will be no uses of the - // cloned instructions outside of EdgeBB. - BasicBlock::iterator InsertPt = EdgeBB->begin(); - DenseMap<Value *, Value *> TranslateMap; // Track translated values. - for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { - if (PHINode *PN = dyn_cast<PHINode>(BBI)) { - TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB); - continue; - } - // Clone the instruction. - Instruction *N = BBI->clone(); - if (BBI->hasName()) - N->setName(BBI->getName() + ".c"); - - // Update operands due to translation. - for (User::op_iterator i = N->op_begin(), e = N->op_end(); i != e; ++i) { - DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(*i); - if (PI != TranslateMap.end()) - *i = PI->second; - } - - // Check for trivial simplification. - if (Value *V = SimplifyInstruction(N, {DL, nullptr, nullptr, AC})) { - if (!BBI->use_empty()) - TranslateMap[&*BBI] = V; - if (!N->mayHaveSideEffects()) { - N->deleteValue(); // Instruction folded away, don't need actual inst - N = nullptr; - } - } else { - if (!BBI->use_empty()) - TranslateMap[&*BBI] = N; - } - // Insert the new instruction into its new home. - if (N) - EdgeBB->getInstList().insert(InsertPt, N); - - // Register the new instruction with the assumption cache if necessary. - if (auto *II = dyn_cast_or_null<IntrinsicInst>(N)) - if (II->getIntrinsicID() == Intrinsic::assume) - AC->registerAssumption(II); - } - - // Loop over all of the edges from PredBB to BB, changing them to branch - // to EdgeBB instead. - Instruction *PredBBTI = PredBB->getTerminator(); - for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i) - if (PredBBTI->getSuccessor(i) == BB) { - BB->removePredecessor(PredBB); - PredBBTI->setSuccessor(i, EdgeBB); - } - - // Recurse, simplifying any other constants. - return FoldCondBranchOnPHI(BI, DL, AC) || true; - } - - return false; -} - -/// Given a BB that starts with the specified two-entry PHI node, -/// see if we can eliminate it. -static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, - const DataLayout &DL) { - // Ok, this is a two entry PHI node. Check to see if this is a simple "if - // statement", which has a very simple dominance structure. Basically, we - // are trying to find the condition that is being branched on, which - // subsequently causes this merge to happen. We really want control - // dependence information for this check, but simplifycfg can't keep it up - // to date, and this catches most of the cases we care about anyway. - BasicBlock *BB = PN->getParent(); - const Function *Fn = BB->getParent(); - if (Fn && Fn->hasFnAttribute(Attribute::OptForFuzzing)) - return false; - - BasicBlock *IfTrue, *IfFalse; - Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse); - if (!IfCond || - // Don't bother if the branch will be constant folded trivially. - isa<ConstantInt>(IfCond)) - return false; - - // Okay, we found that we can merge this two-entry phi node into a select. - // Doing so would require us to fold *all* two entry phi nodes in this block. - // At some point this becomes non-profitable (particularly if the target - // doesn't support cmov's). Only do this transformation if there are two or - // fewer PHI nodes in this block. - unsigned NumPhis = 0; - for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I) - if (NumPhis > 2) - return false; - - // Loop over the PHI's seeing if we can promote them all to select - // instructions. While we are at it, keep track of the instructions - // that need to be moved to the dominating block. - SmallPtrSet<Instruction *, 4> AggressiveInsts; - unsigned MaxCostVal0 = PHINodeFoldingThreshold, - MaxCostVal1 = PHINodeFoldingThreshold; - MaxCostVal0 *= TargetTransformInfo::TCC_Basic; - MaxCostVal1 *= TargetTransformInfo::TCC_Basic; - - for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { - PHINode *PN = cast<PHINode>(II++); - if (Value *V = SimplifyInstruction(PN, {DL, PN})) { - PN->replaceAllUsesWith(V); - PN->eraseFromParent(); - continue; - } - - if (!DominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts, - MaxCostVal0, TTI) || - !DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts, - MaxCostVal1, TTI)) - return false; - } - - // If we folded the first phi, PN dangles at this point. Refresh it. If - // we ran out of PHIs then we simplified them all. - PN = dyn_cast<PHINode>(BB->begin()); - if (!PN) - return true; - - // Don't fold i1 branches on PHIs which contain binary operators. These can - // often be turned into switches and other things. - if (PN->getType()->isIntegerTy(1) && - (isa<BinaryOperator>(PN->getIncomingValue(0)) || - isa<BinaryOperator>(PN->getIncomingValue(1)) || - isa<BinaryOperator>(IfCond))) - return false; - - // If all PHI nodes are promotable, check to make sure that all instructions - // in the predecessor blocks can be promoted as well. If not, we won't be able - // to get rid of the control flow, so it's not worth promoting to select - // instructions. - BasicBlock *DomBlock = nullptr; - BasicBlock *IfBlock1 = PN->getIncomingBlock(0); - BasicBlock *IfBlock2 = PN->getIncomingBlock(1); - if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) { - IfBlock1 = nullptr; - } else { - DomBlock = *pred_begin(IfBlock1); - for (BasicBlock::iterator I = IfBlock1->begin(); !I->isTerminator(); ++I) - if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) { - // This is not an aggressive instruction that we can promote. - // Because of this, we won't be able to get rid of the control flow, so - // the xform is not worth it. - return false; - } - } - - if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) { - IfBlock2 = nullptr; - } else { - DomBlock = *pred_begin(IfBlock2); - for (BasicBlock::iterator I = IfBlock2->begin(); !I->isTerminator(); ++I) - if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) { - // This is not an aggressive instruction that we can promote. - // Because of this, we won't be able to get rid of the control flow, so - // the xform is not worth it. - return false; - } - } - - LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond - << " T: " << IfTrue->getName() - << " F: " << IfFalse->getName() << "\n"); - - // If we can still promote the PHI nodes after this gauntlet of tests, - // do all of the PHI's now. - Instruction *InsertPt = DomBlock->getTerminator(); - IRBuilder<NoFolder> Builder(InsertPt); - - // Move all 'aggressive' instructions, which are defined in the - // conditional parts of the if's up to the dominating block. - if (IfBlock1) - hoistAllInstructionsInto(DomBlock, InsertPt, IfBlock1); - if (IfBlock2) - hoistAllInstructionsInto(DomBlock, InsertPt, IfBlock2); - - while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { - // Change the PHI node into a select instruction. - Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse); - Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue); - - Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", InsertPt); - PN->replaceAllUsesWith(Sel); - Sel->takeName(PN); - PN->eraseFromParent(); - } - - // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement - // has been flattened. Change DomBlock to jump directly to our new block to - // avoid other simplifycfg's kicking in on the diamond. - Instruction *OldTI = DomBlock->getTerminator(); - Builder.SetInsertPoint(OldTI); - Builder.CreateBr(BB); - OldTI->eraseFromParent(); - return true; -} - -/// If we found a conditional branch that goes to two returning blocks, -/// try to merge them together into one return, -/// introducing a select if the return values disagree. -static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, - IRBuilder<> &Builder) { - assert(BI->isConditional() && "Must be a conditional branch"); - BasicBlock *TrueSucc = BI->getSuccessor(0); - BasicBlock *FalseSucc = BI->getSuccessor(1); - ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator()); - ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator()); - - // Check to ensure both blocks are empty (just a return) or optionally empty - // with PHI nodes. If there are other instructions, merging would cause extra - // computation on one path or the other. - if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator()) - return false; - if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator()) - return false; - - Builder.SetInsertPoint(BI); - // Okay, we found a branch that is going to two return nodes. If - // there is no return value for this function, just change the - // branch into a return. - if (FalseRet->getNumOperands() == 0) { - TrueSucc->removePredecessor(BI->getParent()); - FalseSucc->removePredecessor(BI->getParent()); - Builder.CreateRetVoid(); - EraseTerminatorAndDCECond(BI); - return true; - } - - // Otherwise, figure out what the true and false return values are - // so we can insert a new select instruction. - Value *TrueValue = TrueRet->getReturnValue(); - Value *FalseValue = FalseRet->getReturnValue(); - - // Unwrap any PHI nodes in the return blocks. - if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue)) - if (TVPN->getParent() == TrueSucc) - TrueValue = TVPN->getIncomingValueForBlock(BI->getParent()); - if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue)) - if (FVPN->getParent() == FalseSucc) - FalseValue = FVPN->getIncomingValueForBlock(BI->getParent()); - - // In order for this transformation to be safe, we must be able to - // unconditionally execute both operands to the return. This is - // normally the case, but we could have a potentially-trapping - // constant expression that prevents this transformation from being - // safe. - if (ConstantExpr *TCV = dyn_cast_or_null<ConstantExpr>(TrueValue)) - if (TCV->canTrap()) - return false; - if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue)) - if (FCV->canTrap()) - return false; - - // Okay, we collected all the mapped values and checked them for sanity, and - // defined to really do this transformation. First, update the CFG. - TrueSucc->removePredecessor(BI->getParent()); - FalseSucc->removePredecessor(BI->getParent()); - - // Insert select instructions where needed. - Value *BrCond = BI->getCondition(); - if (TrueValue) { - // Insert a select if the results differ. - if (TrueValue == FalseValue || isa<UndefValue>(FalseValue)) { - } else if (isa<UndefValue>(TrueValue)) { - TrueValue = FalseValue; - } else { - TrueValue = - Builder.CreateSelect(BrCond, TrueValue, FalseValue, "retval", BI); - } - } - - Value *RI = - !TrueValue ? Builder.CreateRetVoid() : Builder.CreateRet(TrueValue); - - (void)RI; - - LLVM_DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" - << "\n " << *BI << "NewRet = " << *RI << "TRUEBLOCK: " - << *TrueSucc << "FALSEBLOCK: " << *FalseSucc); - - EraseTerminatorAndDCECond(BI); - - return true; -} - -/// Return true if the given instruction is available -/// in its predecessor block. If yes, the instruction will be removed. -static bool tryCSEWithPredecessor(Instruction *Inst, BasicBlock *PB) { - if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst)) - return false; - for (Instruction &I : *PB) { - Instruction *PBI = &I; - // Check whether Inst and PBI generate the same value. - if (Inst->isIdenticalTo(PBI)) { - Inst->replaceAllUsesWith(PBI); - Inst->eraseFromParent(); - return true; - } - } - return false; -} - -/// Return true if either PBI or BI has branch weight available, and store -/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does -/// not have branch weight, use 1:1 as its weight. -static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, - uint64_t &PredTrueWeight, - uint64_t &PredFalseWeight, - uint64_t &SuccTrueWeight, - uint64_t &SuccFalseWeight) { - bool PredHasWeights = - PBI->extractProfMetadata(PredTrueWeight, PredFalseWeight); - bool SuccHasWeights = - BI->extractProfMetadata(SuccTrueWeight, SuccFalseWeight); - if (PredHasWeights || SuccHasWeights) { - if (!PredHasWeights) - PredTrueWeight = PredFalseWeight = 1; - if (!SuccHasWeights) - SuccTrueWeight = SuccFalseWeight = 1; - return true; - } else { - return false; - } -} - -/// If this basic block is simple enough, and if a predecessor branches to us -/// and one of our successors, fold the block into the predecessor and use -/// logical operations to pick the right destination. -bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, - unsigned BonusInstThreshold) { - BasicBlock *BB = BI->getParent(); - - const unsigned PredCount = pred_size(BB); - - Instruction *Cond = nullptr; - if (BI->isConditional()) - Cond = dyn_cast<Instruction>(BI->getCondition()); - else { - // For unconditional branch, check for a simple CFG pattern, where - // BB has a single predecessor and BB's successor is also its predecessor's - // successor. If such pattern exists, check for CSE between BB and its - // predecessor. - if (BasicBlock *PB = BB->getSinglePredecessor()) - if (BranchInst *PBI = dyn_cast<BranchInst>(PB->getTerminator())) - if (PBI->isConditional() && - (BI->getSuccessor(0) == PBI->getSuccessor(0) || - BI->getSuccessor(0) == PBI->getSuccessor(1))) { - for (auto I = BB->instructionsWithoutDebug().begin(), - E = BB->instructionsWithoutDebug().end(); - I != E;) { - Instruction *Curr = &*I++; - if (isa<CmpInst>(Curr)) { - Cond = Curr; - break; - } - // Quit if we can't remove this instruction. - if (!tryCSEWithPredecessor(Curr, PB)) - return false; - } - } - - if (!Cond) - return false; - } - - if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || - Cond->getParent() != BB || !Cond->hasOneUse()) - return false; - - // Make sure the instruction after the condition is the cond branch. - BasicBlock::iterator CondIt = ++Cond->getIterator(); - - // Ignore dbg intrinsics. - while (isa<DbgInfoIntrinsic>(CondIt)) - ++CondIt; - - if (&*CondIt != BI) - return false; - - // Only allow this transformation if computing the condition doesn't involve - // too many instructions and these involved instructions can be executed - // unconditionally. We denote all involved instructions except the condition - // as "bonus instructions", and only allow this transformation when the - // number of the bonus instructions we'll need to create when cloning into - // each predecessor does not exceed a certain threshold. - unsigned NumBonusInsts = 0; - for (auto I = BB->begin(); Cond != &*I; ++I) { - // Ignore dbg intrinsics. - if (isa<DbgInfoIntrinsic>(I)) - continue; - if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(&*I)) - return false; - // I has only one use and can be executed unconditionally. - Instruction *User = dyn_cast<Instruction>(I->user_back()); - if (User == nullptr || User->getParent() != BB) - return false; - // I is used in the same BB. Since BI uses Cond and doesn't have more slots - // to use any other instruction, User must be an instruction between next(I) - // and Cond. - - // Account for the cost of duplicating this instruction into each - // predecessor. - NumBonusInsts += PredCount; - // Early exits once we reach the limit. - if (NumBonusInsts > BonusInstThreshold) - return false; - } - - // Cond is known to be a compare or binary operator. Check to make sure that - // neither operand is a potentially-trapping constant expression. - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0))) - if (CE->canTrap()) - return false; - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1))) - if (CE->canTrap()) - return false; - - // Finally, don't infinitely unroll conditional loops. - BasicBlock *TrueDest = BI->getSuccessor(0); - BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr; - if (TrueDest == BB || FalseDest == BB) - return false; - - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *PredBlock = *PI; - BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator()); - - // Check that we have two conditional branches. If there is a PHI node in - // the common successor, verify that the same value flows in from both - // blocks. - SmallVector<PHINode *, 4> PHIs; - if (!PBI || PBI->isUnconditional() || - (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) || - (!BI->isConditional() && - !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs))) - continue; - - // Determine if the two branches share a common destination. - Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd; - bool InvertPredCond = false; - - if (BI->isConditional()) { - if (PBI->getSuccessor(0) == TrueDest) { - Opc = Instruction::Or; - } else if (PBI->getSuccessor(1) == FalseDest) { - Opc = Instruction::And; - } else if (PBI->getSuccessor(0) == FalseDest) { - Opc = Instruction::And; - InvertPredCond = true; - } else if (PBI->getSuccessor(1) == TrueDest) { - Opc = Instruction::Or; - InvertPredCond = true; - } else { - continue; - } - } else { - if (PBI->getSuccessor(0) != TrueDest && PBI->getSuccessor(1) != TrueDest) - continue; - } - - LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); - IRBuilder<> Builder(PBI); - - // If we need to invert the condition in the pred block to match, do so now. - if (InvertPredCond) { - Value *NewCond = PBI->getCondition(); - - if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) { - CmpInst *CI = cast<CmpInst>(NewCond); - CI->setPredicate(CI->getInversePredicate()); - } else { - NewCond = - Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not"); - } - - PBI->setCondition(NewCond); - PBI->swapSuccessors(); - } - - // If we have bonus instructions, clone them into the predecessor block. - // Note that there may be multiple predecessor blocks, so we cannot move - // bonus instructions to a predecessor block. - ValueToValueMapTy VMap; // maps original values to cloned values - // We already make sure Cond is the last instruction before BI. Therefore, - // all instructions before Cond other than DbgInfoIntrinsic are bonus - // instructions. - for (auto BonusInst = BB->begin(); Cond != &*BonusInst; ++BonusInst) { - if (isa<DbgInfoIntrinsic>(BonusInst)) - continue; - Instruction *NewBonusInst = BonusInst->clone(); - RemapInstruction(NewBonusInst, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - VMap[&*BonusInst] = NewBonusInst; - - // If we moved a load, we cannot any longer claim any knowledge about - // its potential value. The previous information might have been valid - // only given the branch precondition. - // For an analogous reason, we must also drop all the metadata whose - // semantics we don't understand. - NewBonusInst->dropUnknownNonDebugMetadata(); - - PredBlock->getInstList().insert(PBI->getIterator(), NewBonusInst); - NewBonusInst->takeName(&*BonusInst); - BonusInst->setName(BonusInst->getName() + ".old"); - } - - // Clone Cond into the predecessor basic block, and or/and the - // two conditions together. - Instruction *CondInPred = Cond->clone(); - RemapInstruction(CondInPred, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - PredBlock->getInstList().insert(PBI->getIterator(), CondInPred); - CondInPred->takeName(Cond); - Cond->setName(CondInPred->getName() + ".old"); - - if (BI->isConditional()) { - Instruction *NewCond = cast<Instruction>( - Builder.CreateBinOp(Opc, PBI->getCondition(), CondInPred, "or.cond")); - PBI->setCondition(NewCond); - - uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; - bool HasWeights = - extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight, - SuccTrueWeight, SuccFalseWeight); - SmallVector<uint64_t, 8> NewWeights; - - if (PBI->getSuccessor(0) == BB) { - if (HasWeights) { - // PBI: br i1 %x, BB, FalseDest - // BI: br i1 %y, TrueDest, FalseDest - // TrueWeight is TrueWeight for PBI * TrueWeight for BI. - NewWeights.push_back(PredTrueWeight * SuccTrueWeight); - // FalseWeight is FalseWeight for PBI * TotalWeight for BI + - // TrueWeight for PBI * FalseWeight for BI. - // We assume that total weights of a BranchInst can fit into 32 bits. - // Therefore, we will not have overflow using 64-bit arithmetic. - NewWeights.push_back(PredFalseWeight * - (SuccFalseWeight + SuccTrueWeight) + - PredTrueWeight * SuccFalseWeight); - } - AddPredecessorToBlock(TrueDest, PredBlock, BB, MSSAU); - PBI->setSuccessor(0, TrueDest); - } - if (PBI->getSuccessor(1) == BB) { - if (HasWeights) { - // PBI: br i1 %x, TrueDest, BB - // BI: br i1 %y, TrueDest, FalseDest - // TrueWeight is TrueWeight for PBI * TotalWeight for BI + - // FalseWeight for PBI * TrueWeight for BI. - NewWeights.push_back(PredTrueWeight * - (SuccFalseWeight + SuccTrueWeight) + - PredFalseWeight * SuccTrueWeight); - // FalseWeight is FalseWeight for PBI * FalseWeight for BI. - NewWeights.push_back(PredFalseWeight * SuccFalseWeight); - } - AddPredecessorToBlock(FalseDest, PredBlock, BB, MSSAU); - PBI->setSuccessor(1, FalseDest); - } - if (NewWeights.size() == 2) { - // Halve the weights if any of them cannot fit in an uint32_t - FitWeights(NewWeights); - - SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), - NewWeights.end()); - setBranchWeights(PBI, MDWeights[0], MDWeights[1]); - } else - PBI->setMetadata(LLVMContext::MD_prof, nullptr); - } else { - // Update PHI nodes in the common successors. - for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { - ConstantInt *PBI_C = cast<ConstantInt>( - PHIs[i]->getIncomingValueForBlock(PBI->getParent())); - assert(PBI_C->getType()->isIntegerTy(1)); - Instruction *MergedCond = nullptr; - if (PBI->getSuccessor(0) == TrueDest) { - // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value) - // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value) - // is false: !PBI_Cond and BI_Value - Instruction *NotCond = cast<Instruction>( - Builder.CreateNot(PBI->getCondition(), "not.cond")); - MergedCond = cast<Instruction>( - Builder.CreateBinOp(Instruction::And, NotCond, CondInPred, - "and.cond")); - if (PBI_C->isOne()) - MergedCond = cast<Instruction>(Builder.CreateBinOp( - Instruction::Or, PBI->getCondition(), MergedCond, "or.cond")); - } else { - // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C) - // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond) - // is false: PBI_Cond and BI_Value - MergedCond = cast<Instruction>(Builder.CreateBinOp( - Instruction::And, PBI->getCondition(), CondInPred, "and.cond")); - if (PBI_C->isOne()) { - Instruction *NotCond = cast<Instruction>( - Builder.CreateNot(PBI->getCondition(), "not.cond")); - MergedCond = cast<Instruction>(Builder.CreateBinOp( - Instruction::Or, NotCond, MergedCond, "or.cond")); - } - } - // Update PHI Node. - PHIs[i]->setIncomingValueForBlock(PBI->getParent(), MergedCond); - } - - // PBI is changed to branch to TrueDest below. Remove itself from - // potential phis from all other successors. - if (MSSAU) - MSSAU->changeCondBranchToUnconditionalTo(PBI, TrueDest); - - // Change PBI from Conditional to Unconditional. - BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI); - EraseTerminatorAndDCECond(PBI, MSSAU); - PBI = New_PBI; - } - - // If BI was a loop latch, it may have had associated loop metadata. - // We need to copy it to the new latch, that is, PBI. - if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop)) - PBI->setMetadata(LLVMContext::MD_loop, LoopMD); - - // TODO: If BB is reachable from all paths through PredBlock, then we - // could replace PBI's branch probabilities with BI's. - - // Copy any debug value intrinsics into the end of PredBlock. - for (Instruction &I : *BB) - if (isa<DbgInfoIntrinsic>(I)) - I.clone()->insertBefore(PBI); - - return true; - } - return false; -} - -// If there is only one store in BB1 and BB2, return it, otherwise return -// nullptr. -static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) { - StoreInst *S = nullptr; - for (auto *BB : {BB1, BB2}) { - if (!BB) - continue; - for (auto &I : *BB) - if (auto *SI = dyn_cast<StoreInst>(&I)) { - if (S) - // Multiple stores seen. - return nullptr; - else - S = SI; - } - } - return S; -} - -static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, - Value *AlternativeV = nullptr) { - // PHI is going to be a PHI node that allows the value V that is defined in - // BB to be referenced in BB's only successor. - // - // If AlternativeV is nullptr, the only value we care about in PHI is V. It - // doesn't matter to us what the other operand is (it'll never get used). We - // could just create a new PHI with an undef incoming value, but that could - // increase register pressure if EarlyCSE/InstCombine can't fold it with some - // other PHI. So here we directly look for some PHI in BB's successor with V - // as an incoming operand. If we find one, we use it, else we create a new - // one. - // - // If AlternativeV is not nullptr, we care about both incoming values in PHI. - // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV] - // where OtherBB is the single other predecessor of BB's only successor. - PHINode *PHI = nullptr; - BasicBlock *Succ = BB->getSingleSuccessor(); - - for (auto I = Succ->begin(); isa<PHINode>(I); ++I) - if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) { - PHI = cast<PHINode>(I); - if (!AlternativeV) - break; - - assert(Succ->hasNPredecessors(2)); - auto PredI = pred_begin(Succ); - BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI; - if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV) - break; - PHI = nullptr; - } - if (PHI) - return PHI; - - // If V is not an instruction defined in BB, just return it. - if (!AlternativeV && - (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB)) - return V; - - PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front()); - PHI->addIncoming(V, BB); - for (BasicBlock *PredBB : predecessors(Succ)) - if (PredBB != BB) - PHI->addIncoming( - AlternativeV ? AlternativeV : UndefValue::get(V->getType()), PredBB); - return PHI; -} - -static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, - BasicBlock *QTB, BasicBlock *QFB, - BasicBlock *PostBB, Value *Address, - bool InvertPCond, bool InvertQCond, - const DataLayout &DL) { - auto IsaBitcastOfPointerType = [](const Instruction &I) { - return Operator::getOpcode(&I) == Instruction::BitCast && - I.getType()->isPointerTy(); - }; - - // If we're not in aggressive mode, we only optimize if we have some - // confidence that by optimizing we'll allow P and/or Q to be if-converted. - auto IsWorthwhile = [&](BasicBlock *BB) { - if (!BB) - return true; - // Heuristic: if the block can be if-converted/phi-folded and the - // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to - // thread this store. - unsigned N = 0; - for (auto &I : BB->instructionsWithoutDebug()) { - // Cheap instructions viable for folding. - if (isa<BinaryOperator>(I) || isa<GetElementPtrInst>(I) || - isa<StoreInst>(I)) - ++N; - // Free instructions. - else if (I.isTerminator() || IsaBitcastOfPointerType(I)) - continue; - else - return false; - } - // The store we want to merge is counted in N, so add 1 to make sure - // we're counting the instructions that would be left. - return N <= (PHINodeFoldingThreshold + 1); - }; - - if (!MergeCondStoresAggressively && - (!IsWorthwhile(PTB) || !IsWorthwhile(PFB) || !IsWorthwhile(QTB) || - !IsWorthwhile(QFB))) - return false; - - // For every pointer, there must be exactly two stores, one coming from - // PTB or PFB, and the other from QTB or QFB. We don't support more than one - // store (to any address) in PTB,PFB or QTB,QFB. - // FIXME: We could relax this restriction with a bit more work and performance - // testing. - StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB); - StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB); - if (!PStore || !QStore) - return false; - - // Now check the stores are compatible. - if (!QStore->isUnordered() || !PStore->isUnordered()) - return false; - - // Check that sinking the store won't cause program behavior changes. Sinking - // the store out of the Q blocks won't change any behavior as we're sinking - // from a block to its unconditional successor. But we're moving a store from - // the P blocks down through the middle block (QBI) and past both QFB and QTB. - // So we need to check that there are no aliasing loads or stores in - // QBI, QTB and QFB. We also need to check there are no conflicting memory - // operations between PStore and the end of its parent block. - // - // The ideal way to do this is to query AliasAnalysis, but we don't - // preserve AA currently so that is dangerous. Be super safe and just - // check there are no other memory operations at all. - for (auto &I : *QFB->getSinglePredecessor()) - if (I.mayReadOrWriteMemory()) - return false; - for (auto &I : *QFB) - if (&I != QStore && I.mayReadOrWriteMemory()) - return false; - if (QTB) - for (auto &I : *QTB) - if (&I != QStore && I.mayReadOrWriteMemory()) - return false; - for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end(); - I != E; ++I) - if (&*I != PStore && I->mayReadOrWriteMemory()) - return false; - - // If PostBB has more than two predecessors, we need to split it so we can - // sink the store. - if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) { - // We know that QFB's only successor is PostBB. And QFB has a single - // predecessor. If QTB exists, then its only successor is also PostBB. - // If QTB does not exist, then QFB's only predecessor has a conditional - // branch to QFB and PostBB. - BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor(); - BasicBlock *NewBB = SplitBlockPredecessors(PostBB, { QFB, TruePred}, - "condstore.split"); - if (!NewBB) - return false; - PostBB = NewBB; - } - - // OK, we're going to sink the stores to PostBB. The store has to be - // conditional though, so first create the predicate. - Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator()) - ->getCondition(); - Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator()) - ->getCondition(); - - Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(), - PStore->getParent()); - Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(), - QStore->getParent(), PPHI); - - IRBuilder<> QB(&*PostBB->getFirstInsertionPt()); - - Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond); - Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond); - - if (InvertPCond) - PPred = QB.CreateNot(PPred); - if (InvertQCond) - QPred = QB.CreateNot(QPred); - Value *CombinedPred = QB.CreateOr(PPred, QPred); - - auto *T = - SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), false); - QB.SetInsertPoint(T); - StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address)); - AAMDNodes AAMD; - PStore->getAAMetadata(AAMD, /*Merge=*/false); - PStore->getAAMetadata(AAMD, /*Merge=*/true); - SI->setAAMetadata(AAMD); - unsigned PAlignment = PStore->getAlignment(); - unsigned QAlignment = QStore->getAlignment(); - unsigned TypeAlignment = - DL.getABITypeAlignment(SI->getValueOperand()->getType()); - unsigned MinAlignment; - unsigned MaxAlignment; - std::tie(MinAlignment, MaxAlignment) = std::minmax(PAlignment, QAlignment); - // Choose the minimum alignment. If we could prove both stores execute, we - // could use biggest one. In this case, though, we only know that one of the - // stores executes. And we don't know it's safe to take the alignment from a - // store that doesn't execute. - if (MinAlignment != 0) { - // Choose the minimum of all non-zero alignments. - SI->setAlignment(MinAlignment); - } else if (MaxAlignment != 0) { - // Choose the minimal alignment between the non-zero alignment and the ABI - // default alignment for the type of the stored value. - SI->setAlignment(std::min(MaxAlignment, TypeAlignment)); - } else { - // If both alignments are zero, use ABI default alignment for the type of - // the stored value. - SI->setAlignment(TypeAlignment); - } - - QStore->eraseFromParent(); - PStore->eraseFromParent(); - - return true; -} - -static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, - const DataLayout &DL) { - // The intention here is to find diamonds or triangles (see below) where each - // conditional block contains a store to the same address. Both of these - // stores are conditional, so they can't be unconditionally sunk. But it may - // be profitable to speculatively sink the stores into one merged store at the - // end, and predicate the merged store on the union of the two conditions of - // PBI and QBI. - // - // This can reduce the number of stores executed if both of the conditions are - // true, and can allow the blocks to become small enough to be if-converted. - // This optimization will also chain, so that ladders of test-and-set - // sequences can be if-converted away. - // - // We only deal with simple diamonds or triangles: - // - // PBI or PBI or a combination of the two - // / \ | \ - // PTB PFB | PFB - // \ / | / - // QBI QBI - // / \ | \ - // QTB QFB | QFB - // \ / | / - // PostBB PostBB - // - // We model triangles as a type of diamond with a nullptr "true" block. - // Triangles are canonicalized so that the fallthrough edge is represented by - // a true condition, as in the diagram above. - BasicBlock *PTB = PBI->getSuccessor(0); - BasicBlock *PFB = PBI->getSuccessor(1); - BasicBlock *QTB = QBI->getSuccessor(0); - BasicBlock *QFB = QBI->getSuccessor(1); - BasicBlock *PostBB = QFB->getSingleSuccessor(); - - // Make sure we have a good guess for PostBB. If QTB's only successor is - // QFB, then QFB is a better PostBB. - if (QTB->getSingleSuccessor() == QFB) - PostBB = QFB; - - // If we couldn't find a good PostBB, stop. - if (!PostBB) - return false; - - bool InvertPCond = false, InvertQCond = false; - // Canonicalize fallthroughs to the true branches. - if (PFB == QBI->getParent()) { - std::swap(PFB, PTB); - InvertPCond = true; - } - if (QFB == PostBB) { - std::swap(QFB, QTB); - InvertQCond = true; - } - - // From this point on we can assume PTB or QTB may be fallthroughs but PFB - // and QFB may not. Model fallthroughs as a nullptr block. - if (PTB == QBI->getParent()) - PTB = nullptr; - if (QTB == PostBB) - QTB = nullptr; - - // Legality bailouts. We must have at least the non-fallthrough blocks and - // the post-dominating block, and the non-fallthroughs must only have one - // predecessor. - auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) { - return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S; - }; - if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) || - !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB)) - return false; - if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) || - (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB))) - return false; - if (!QBI->getParent()->hasNUses(2)) - return false; - - // OK, this is a sequence of two diamonds or triangles. - // Check if there are stores in PTB or PFB that are repeated in QTB or QFB. - SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses; - for (auto *BB : {PTB, PFB}) { - if (!BB) - continue; - for (auto &I : *BB) - if (StoreInst *SI = dyn_cast<StoreInst>(&I)) - PStoreAddresses.insert(SI->getPointerOperand()); - } - for (auto *BB : {QTB, QFB}) { - if (!BB) - continue; - for (auto &I : *BB) - if (StoreInst *SI = dyn_cast<StoreInst>(&I)) - QStoreAddresses.insert(SI->getPointerOperand()); - } - - set_intersect(PStoreAddresses, QStoreAddresses); - // set_intersect mutates PStoreAddresses in place. Rename it here to make it - // clear what it contains. - auto &CommonAddresses = PStoreAddresses; - - bool Changed = false; - for (auto *Address : CommonAddresses) - Changed |= mergeConditionalStoreToAddress( - PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL); - return Changed; -} - -/// If we have a conditional branch as a predecessor of another block, -/// this function tries to simplify it. We know -/// that PBI and BI are both conditional branches, and BI is in one of the -/// successor blocks of PBI - PBI branches to BI. -static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, - const DataLayout &DL) { - assert(PBI->isConditional() && BI->isConditional()); - BasicBlock *BB = BI->getParent(); - - // If this block ends with a branch instruction, and if there is a - // predecessor that ends on a branch of the same condition, make - // this conditional branch redundant. - if (PBI->getCondition() == BI->getCondition() && - PBI->getSuccessor(0) != PBI->getSuccessor(1)) { - // Okay, the outcome of this conditional branch is statically - // knowable. If this block had a single pred, handle specially. - if (BB->getSinglePredecessor()) { - // Turn this into a branch on constant. - bool CondIsTrue = PBI->getSuccessor(0) == BB; - BI->setCondition( - ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue)); - return true; // Nuke the branch on constant. - } - - // Otherwise, if there are multiple predecessors, insert a PHI that merges - // in the constant and simplify the block result. Subsequent passes of - // simplifycfg will thread the block. - if (BlockIsSimpleEnoughToThreadThrough(BB)) { - pred_iterator PB = pred_begin(BB), PE = pred_end(BB); - PHINode *NewPN = PHINode::Create( - Type::getInt1Ty(BB->getContext()), std::distance(PB, PE), - BI->getCondition()->getName() + ".pr", &BB->front()); - // Okay, we're going to insert the PHI node. Since PBI is not the only - // predecessor, compute the PHI'd conditional value for all of the preds. - // Any predecessor where the condition is not computable we keep symbolic. - for (pred_iterator PI = PB; PI != PE; ++PI) { - BasicBlock *P = *PI; - if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) && PBI != BI && - PBI->isConditional() && PBI->getCondition() == BI->getCondition() && - PBI->getSuccessor(0) != PBI->getSuccessor(1)) { - bool CondIsTrue = PBI->getSuccessor(0) == BB; - NewPN->addIncoming( - ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue), - P); - } else { - NewPN->addIncoming(BI->getCondition(), P); - } - } - - BI->setCondition(NewPN); - return true; - } - } - - if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition())) - if (CE->canTrap()) - return false; - - // If both branches are conditional and both contain stores to the same - // address, remove the stores from the conditionals and create a conditional - // merged store at the end. - if (MergeCondStores && mergeConditionalStores(PBI, BI, DL)) - return true; - - // If this is a conditional branch in an empty block, and if any - // predecessors are a conditional branch to one of our destinations, - // fold the conditions into logical ops and one cond br. - - // Ignore dbg intrinsics. - if (&*BB->instructionsWithoutDebug().begin() != BI) - return false; - - int PBIOp, BIOp; - if (PBI->getSuccessor(0) == BI->getSuccessor(0)) { - PBIOp = 0; - BIOp = 0; - } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) { - PBIOp = 0; - BIOp = 1; - } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) { - PBIOp = 1; - BIOp = 0; - } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) { - PBIOp = 1; - BIOp = 1; - } else { - return false; - } - - // Check to make sure that the other destination of this branch - // isn't BB itself. If so, this is an infinite loop that will - // keep getting unwound. - if (PBI->getSuccessor(PBIOp) == BB) - return false; - - // Do not perform this transformation if it would require - // insertion of a large number of select instructions. For targets - // without predication/cmovs, this is a big pessimization. - - // Also do not perform this transformation if any phi node in the common - // destination block can trap when reached by BB or PBB (PR17073). In that - // case, it would be unsafe to hoist the operation into a select instruction. - - BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); - unsigned NumPhis = 0; - for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II); - ++II, ++NumPhis) { - if (NumPhis > 2) // Disable this xform. - return false; - - PHINode *PN = cast<PHINode>(II); - Value *BIV = PN->getIncomingValueForBlock(BB); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BIV)) - if (CE->canTrap()) - return false; - - unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent()); - Value *PBIV = PN->getIncomingValue(PBBIdx); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(PBIV)) - if (CE->canTrap()) - return false; - } - - // Finally, if everything is ok, fold the branches to logical ops. - BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); - - LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() - << "AND: " << *BI->getParent()); - - // If OtherDest *is* BB, then BB is a basic block with a single conditional - // branch in it, where one edge (OtherDest) goes back to itself but the other - // exits. We don't *know* that the program avoids the infinite loop - // (even though that seems likely). If we do this xform naively, we'll end up - // recursively unpeeling the loop. Since we know that (after the xform is - // done) that the block *is* infinite if reached, we just make it an obviously - // infinite loop with no cond branch. - if (OtherDest == BB) { - // Insert it at the end of the function, because it's either code, - // or it won't matter if it's hot. :) - BasicBlock *InfLoopBlock = - BasicBlock::Create(BB->getContext(), "infloop", BB->getParent()); - BranchInst::Create(InfLoopBlock, InfLoopBlock); - OtherDest = InfLoopBlock; - } - - LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent()); - - // BI may have other predecessors. Because of this, we leave - // it alone, but modify PBI. - - // Make sure we get to CommonDest on True&True directions. - Value *PBICond = PBI->getCondition(); - IRBuilder<NoFolder> Builder(PBI); - if (PBIOp) - PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not"); - - Value *BICond = BI->getCondition(); - if (BIOp) - BICond = Builder.CreateNot(BICond, BICond->getName() + ".not"); - - // Merge the conditions. - Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge"); - - // Modify PBI to branch on the new condition to the new dests. - PBI->setCondition(Cond); - PBI->setSuccessor(0, CommonDest); - PBI->setSuccessor(1, OtherDest); - - // Update branch weight for PBI. - uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; - uint64_t PredCommon, PredOther, SuccCommon, SuccOther; - bool HasWeights = - extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight, - SuccTrueWeight, SuccFalseWeight); - if (HasWeights) { - PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight; - PredOther = PBIOp ? PredTrueWeight : PredFalseWeight; - SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight; - SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight; - // The weight to CommonDest should be PredCommon * SuccTotal + - // PredOther * SuccCommon. - // The weight to OtherDest should be PredOther * SuccOther. - uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) + - PredOther * SuccCommon, - PredOther * SuccOther}; - // Halve the weights if any of them cannot fit in an uint32_t - FitWeights(NewWeights); - - setBranchWeights(PBI, NewWeights[0], NewWeights[1]); - } - - // OtherDest may have phi nodes. If so, add an entry from PBI's - // block that are identical to the entries for BI's block. - AddPredecessorToBlock(OtherDest, PBI->getParent(), BB); - - // We know that the CommonDest already had an edge from PBI to - // it. If it has PHIs though, the PHIs may have different - // entries for BB and PBI's BB. If so, insert a select to make - // them agree. - for (PHINode &PN : CommonDest->phis()) { - Value *BIV = PN.getIncomingValueForBlock(BB); - unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent()); - Value *PBIV = PN.getIncomingValue(PBBIdx); - if (BIV != PBIV) { - // Insert a select in PBI to pick the right value. - SelectInst *NV = cast<SelectInst>( - Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux")); - PN.setIncomingValue(PBBIdx, NV); - // Although the select has the same condition as PBI, the original branch - // weights for PBI do not apply to the new select because the select's - // 'logical' edges are incoming edges of the phi that is eliminated, not - // the outgoing edges of PBI. - if (HasWeights) { - uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight; - uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight; - uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight; - uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight; - // The weight to PredCommonDest should be PredCommon * SuccTotal. - // The weight to PredOtherDest should be PredOther * SuccCommon. - uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther), - PredOther * SuccCommon}; - - FitWeights(NewWeights); - - setBranchWeights(NV, NewWeights[0], NewWeights[1]); - } - } - } - - LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent()); - LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent()); - - // This basic block is probably dead. We know it has at least - // one fewer predecessor. - return true; -} - -// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is -// true or to FalseBB if Cond is false. -// Takes care of updating the successors and removing the old terminator. -// Also makes sure not to introduce new successors by assuming that edges to -// non-successor TrueBBs and FalseBBs aren't reachable. -static bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond, - BasicBlock *TrueBB, BasicBlock *FalseBB, - uint32_t TrueWeight, - uint32_t FalseWeight) { - // Remove any superfluous successor edges from the CFG. - // First, figure out which successors to preserve. - // If TrueBB and FalseBB are equal, only try to preserve one copy of that - // successor. - BasicBlock *KeepEdge1 = TrueBB; - BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr; - - // Then remove the rest. - for (BasicBlock *Succ : successors(OldTerm)) { - // Make sure only to keep exactly one copy of each edge. - if (Succ == KeepEdge1) - KeepEdge1 = nullptr; - else if (Succ == KeepEdge2) - KeepEdge2 = nullptr; - else - Succ->removePredecessor(OldTerm->getParent(), - /*KeepOneInputPHIs=*/true); - } - - IRBuilder<> Builder(OldTerm); - Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc()); - - // Insert an appropriate new terminator. - if (!KeepEdge1 && !KeepEdge2) { - if (TrueBB == FalseBB) - // We were only looking for one successor, and it was present. - // Create an unconditional branch to it. - Builder.CreateBr(TrueBB); - else { - // We found both of the successors we were looking for. - // Create a conditional branch sharing the condition of the select. - BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB); - if (TrueWeight != FalseWeight) - setBranchWeights(NewBI, TrueWeight, FalseWeight); - } - } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) { - // Neither of the selected blocks were successors, so this - // terminator must be unreachable. - new UnreachableInst(OldTerm->getContext(), OldTerm); - } else { - // One of the selected values was a successor, but the other wasn't. - // Insert an unconditional branch to the one that was found; - // the edge to the one that wasn't must be unreachable. - if (!KeepEdge1) - // Only TrueBB was found. - Builder.CreateBr(TrueBB); - else - // Only FalseBB was found. - Builder.CreateBr(FalseBB); - } - - EraseTerminatorAndDCECond(OldTerm); - return true; -} - -// Replaces -// (switch (select cond, X, Y)) on constant X, Y -// with a branch - conditional if X and Y lead to distinct BBs, -// unconditional otherwise. -static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { - // Check for constant integer values in the select. - ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue()); - ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue()); - if (!TrueVal || !FalseVal) - return false; - - // Find the relevant condition and destinations. - Value *Condition = Select->getCondition(); - BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor(); - BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor(); - - // Get weight for TrueBB and FalseBB. - uint32_t TrueWeight = 0, FalseWeight = 0; - SmallVector<uint64_t, 8> Weights; - bool HasWeights = HasBranchWeights(SI); - if (HasWeights) { - GetBranchWeights(SI, Weights); - if (Weights.size() == 1 + SI->getNumCases()) { - TrueWeight = - (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()]; - FalseWeight = - (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()]; - } - } - - // Perform the actual simplification. - return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight, - FalseWeight); -} - -// Replaces -// (indirectbr (select cond, blockaddress(@fn, BlockA), -// blockaddress(@fn, BlockB))) -// with -// (br cond, BlockA, BlockB). -static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { - // Check that both operands of the select are block addresses. - BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue()); - BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue()); - if (!TBA || !FBA) - return false; - - // Extract the actual blocks. - BasicBlock *TrueBB = TBA->getBasicBlock(); - BasicBlock *FalseBB = FBA->getBasicBlock(); - - // Perform the actual simplification. - return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0, - 0); -} - -/// This is called when we find an icmp instruction -/// (a seteq/setne with a constant) as the only instruction in a -/// block that ends with an uncond branch. We are looking for a very specific -/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In -/// this case, we merge the first two "or's of icmp" into a switch, but then the -/// default value goes to an uncond block with a seteq in it, we get something -/// like: -/// -/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ] -/// DEFAULT: -/// %tmp = icmp eq i8 %A, 92 -/// br label %end -/// end: -/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ] -/// -/// We prefer to split the edge to 'end' so that there is a true/false entry to -/// the PHI, merging the third icmp into the switch. -bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt( - ICmpInst *ICI, IRBuilder<> &Builder) { - BasicBlock *BB = ICI->getParent(); - - // If the block has any PHIs in it or the icmp has multiple uses, it is too - // complex. - if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse()) - return false; - - Value *V = ICI->getOperand(0); - ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1)); - - // The pattern we're looking for is where our only predecessor is a switch on - // 'V' and this block is the default case for the switch. In this case we can - // fold the compared value into the switch to simplify things. - BasicBlock *Pred = BB->getSinglePredecessor(); - if (!Pred || !isa<SwitchInst>(Pred->getTerminator())) - return false; - - SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator()); - if (SI->getCondition() != V) - return false; - - // If BB is reachable on a non-default case, then we simply know the value of - // V in this block. Substitute it and constant fold the icmp instruction - // away. - if (SI->getDefaultDest() != BB) { - ConstantInt *VVal = SI->findCaseDest(BB); - assert(VVal && "Should have a unique destination value"); - ICI->setOperand(0, VVal); - - if (Value *V = SimplifyInstruction(ICI, {DL, ICI})) { - ICI->replaceAllUsesWith(V); - ICI->eraseFromParent(); - } - // BB is now empty, so it is likely to simplify away. - return requestResimplify(); - } - - // Ok, the block is reachable from the default dest. If the constant we're - // comparing exists in one of the other edges, then we can constant fold ICI - // and zap it. - if (SI->findCaseValue(Cst) != SI->case_default()) { - Value *V; - if (ICI->getPredicate() == ICmpInst::ICMP_EQ) - V = ConstantInt::getFalse(BB->getContext()); - else - V = ConstantInt::getTrue(BB->getContext()); - - ICI->replaceAllUsesWith(V); - ICI->eraseFromParent(); - // BB is now empty, so it is likely to simplify away. - return requestResimplify(); - } - - // The use of the icmp has to be in the 'end' block, by the only PHI node in - // the block. - BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0); - PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back()); - if (PHIUse == nullptr || PHIUse != &SuccBlock->front() || - isa<PHINode>(++BasicBlock::iterator(PHIUse))) - return false; - - // If the icmp is a SETEQ, then the default dest gets false, the new edge gets - // true in the PHI. - Constant *DefaultCst = ConstantInt::getTrue(BB->getContext()); - Constant *NewCst = ConstantInt::getFalse(BB->getContext()); - - if (ICI->getPredicate() == ICmpInst::ICMP_EQ) - std::swap(DefaultCst, NewCst); - - // Replace ICI (which is used by the PHI for the default value) with true or - // false depending on if it is EQ or NE. - ICI->replaceAllUsesWith(DefaultCst); - ICI->eraseFromParent(); - - // Okay, the switch goes to this block on a default value. Add an edge from - // the switch to the merge point on the compared value. - BasicBlock *NewBB = - BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB); - { - SwitchInstProfUpdateWrapper SIW(*SI); - auto W0 = SIW.getSuccessorWeight(0); - SwitchInstProfUpdateWrapper::CaseWeightOpt NewW; - if (W0) { - NewW = ((uint64_t(*W0) + 1) >> 1); - SIW.setSuccessorWeight(0, *NewW); - } - SIW.addCase(Cst, NewBB, NewW); - } - - // NewBB branches to the phi block, add the uncond branch and the phi entry. - Builder.SetInsertPoint(NewBB); - Builder.SetCurrentDebugLocation(SI->getDebugLoc()); - Builder.CreateBr(SuccBlock); - PHIUse->addIncoming(NewCst, NewBB); - return true; -} - -/// The specified branch is a conditional branch. -/// Check to see if it is branching on an or/and chain of icmp instructions, and -/// fold it into a switch instruction if so. -static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, - const DataLayout &DL) { - Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); - if (!Cond) - return false; - - // Change br (X == 0 | X == 1), T, F into a switch instruction. - // If this is a bunch of seteq's or'd together, or if it's a bunch of - // 'setne's and'ed together, collect them. - - // Try to gather values from a chain of and/or to be turned into a switch - ConstantComparesGatherer ConstantCompare(Cond, DL); - // Unpack the result - SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals; - Value *CompVal = ConstantCompare.CompValue; - unsigned UsedICmps = ConstantCompare.UsedICmps; - Value *ExtraCase = ConstantCompare.Extra; - - // If we didn't have a multiply compared value, fail. - if (!CompVal) - return false; - - // Avoid turning single icmps into a switch. - if (UsedICmps <= 1) - return false; - - bool TrueWhenEqual = (Cond->getOpcode() == Instruction::Or); - - // There might be duplicate constants in the list, which the switch - // instruction can't handle, remove them now. - array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate); - Values.erase(std::unique(Values.begin(), Values.end()), Values.end()); - - // If Extra was used, we require at least two switch values to do the - // transformation. A switch with one value is just a conditional branch. - if (ExtraCase && Values.size() < 2) - return false; - - // TODO: Preserve branch weight metadata, similarly to how - // FoldValueComparisonIntoPredecessors preserves it. - - // Figure out which block is which destination. - BasicBlock *DefaultBB = BI->getSuccessor(1); - BasicBlock *EdgeBB = BI->getSuccessor(0); - if (!TrueWhenEqual) - std::swap(DefaultBB, EdgeBB); - - BasicBlock *BB = BI->getParent(); - - LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() - << " cases into SWITCH. BB is:\n" - << *BB); - - // If there are any extra values that couldn't be folded into the switch - // then we evaluate them with an explicit branch first. Split the block - // right before the condbr to handle it. - if (ExtraCase) { - BasicBlock *NewBB = - BB->splitBasicBlock(BI->getIterator(), "switch.early.test"); - // Remove the uncond branch added to the old block. - Instruction *OldTI = BB->getTerminator(); - Builder.SetInsertPoint(OldTI); - - if (TrueWhenEqual) - Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB); - else - Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB); - - OldTI->eraseFromParent(); - - // If there are PHI nodes in EdgeBB, then we need to add a new entry to them - // for the edge we just added. - AddPredecessorToBlock(EdgeBB, BB, NewBB); - - LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase - << "\nEXTRABB = " << *BB); - BB = NewBB; - } - - Builder.SetInsertPoint(BI); - // Convert pointer to int before we switch. - if (CompVal->getType()->isPointerTy()) { - CompVal = Builder.CreatePtrToInt( - CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr"); - } - - // Create the new switch instruction now. - SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size()); - - // Add all of the 'cases' to the switch instruction. - for (unsigned i = 0, e = Values.size(); i != e; ++i) - New->addCase(Values[i], EdgeBB); - - // We added edges from PI to the EdgeBB. As such, if there were any - // PHI nodes in EdgeBB, they need entries to be added corresponding to - // the number of edges added. - for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) { - PHINode *PN = cast<PHINode>(BBI); - Value *InVal = PN->getIncomingValueForBlock(BB); - for (unsigned i = 0, e = Values.size() - 1; i != e; ++i) - PN->addIncoming(InVal, BB); - } - - // Erase the old branch instruction. - EraseTerminatorAndDCECond(BI); - - LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n'); - return true; -} - -bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { - if (isa<PHINode>(RI->getValue())) - return SimplifyCommonResume(RI); - else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) && - RI->getValue() == RI->getParent()->getFirstNonPHI()) - // The resume must unwind the exception that caused control to branch here. - return SimplifySingleResume(RI); - - return false; -} - -// Simplify resume that is shared by several landing pads (phi of landing pad). -bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) { - BasicBlock *BB = RI->getParent(); - - // Check that there are no other instructions except for debug intrinsics - // between the phi of landing pads (RI->getValue()) and resume instruction. - BasicBlock::iterator I = cast<Instruction>(RI->getValue())->getIterator(), - E = RI->getIterator(); - while (++I != E) - if (!isa<DbgInfoIntrinsic>(I)) - return false; - - SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks; - auto *PhiLPInst = cast<PHINode>(RI->getValue()); - - // Check incoming blocks to see if any of them are trivial. - for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End; - Idx++) { - auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx); - auto *IncomingValue = PhiLPInst->getIncomingValue(Idx); - - // If the block has other successors, we can not delete it because - // it has other dependents. - if (IncomingBB->getUniqueSuccessor() != BB) - continue; - - auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI()); - // Not the landing pad that caused the control to branch here. - if (IncomingValue != LandingPad) - continue; - - bool isTrivial = true; - - I = IncomingBB->getFirstNonPHI()->getIterator(); - E = IncomingBB->getTerminator()->getIterator(); - while (++I != E) - if (!isa<DbgInfoIntrinsic>(I)) { - isTrivial = false; - break; - } - - if (isTrivial) - TrivialUnwindBlocks.insert(IncomingBB); - } - - // If no trivial unwind blocks, don't do any simplifications. - if (TrivialUnwindBlocks.empty()) - return false; - - // Turn all invokes that unwind here into calls. - for (auto *TrivialBB : TrivialUnwindBlocks) { - // Blocks that will be simplified should be removed from the phi node. - // Note there could be multiple edges to the resume block, and we need - // to remove them all. - while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1) - BB->removePredecessor(TrivialBB, true); - - for (pred_iterator PI = pred_begin(TrivialBB), PE = pred_end(TrivialBB); - PI != PE;) { - BasicBlock *Pred = *PI++; - removeUnwindEdge(Pred); - } - - // In each SimplifyCFG run, only the current processed block can be erased. - // Otherwise, it will break the iteration of SimplifyCFG pass. So instead - // of erasing TrivialBB, we only remove the branch to the common resume - // block so that we can later erase the resume block since it has no - // predecessors. - TrivialBB->getTerminator()->eraseFromParent(); - new UnreachableInst(RI->getContext(), TrivialBB); - } - - // Delete the resume block if all its predecessors have been removed. - if (pred_empty(BB)) - BB->eraseFromParent(); - - return !TrivialUnwindBlocks.empty(); -} - -// Simplify resume that is only used by a single (non-phi) landing pad. -bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) { - BasicBlock *BB = RI->getParent(); - LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI()); - assert(RI->getValue() == LPInst && - "Resume must unwind the exception that caused control to here"); - - // Check that there are no other instructions except for debug intrinsics. - BasicBlock::iterator I = LPInst->getIterator(), E = RI->getIterator(); - while (++I != E) - if (!isa<DbgInfoIntrinsic>(I)) - return false; - - // Turn all invokes that unwind here into calls and delete the basic block. - for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { - BasicBlock *Pred = *PI++; - removeUnwindEdge(Pred); - } - - // The landingpad is now unreachable. Zap it. - if (LoopHeaders) - LoopHeaders->erase(BB); - BB->eraseFromParent(); - return true; -} - -static bool removeEmptyCleanup(CleanupReturnInst *RI) { - // If this is a trivial cleanup pad that executes no instructions, it can be - // eliminated. If the cleanup pad continues to the caller, any predecessor - // that is an EH pad will be updated to continue to the caller and any - // predecessor that terminates with an invoke instruction will have its invoke - // instruction converted to a call instruction. If the cleanup pad being - // simplified does not continue to the caller, each predecessor will be - // updated to continue to the unwind destination of the cleanup pad being - // simplified. - BasicBlock *BB = RI->getParent(); - CleanupPadInst *CPInst = RI->getCleanupPad(); - if (CPInst->getParent() != BB) - // This isn't an empty cleanup. - return false; - - // We cannot kill the pad if it has multiple uses. This typically arises - // from unreachable basic blocks. - if (!CPInst->hasOneUse()) - return false; - - // Check that there are no other instructions except for benign intrinsics. - BasicBlock::iterator I = CPInst->getIterator(), E = RI->getIterator(); - while (++I != E) { - auto *II = dyn_cast<IntrinsicInst>(I); - if (!II) - return false; - - Intrinsic::ID IntrinsicID = II->getIntrinsicID(); - switch (IntrinsicID) { - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::dbg_label: - case Intrinsic::lifetime_end: - break; - default: - return false; - } - } - - // If the cleanup return we are simplifying unwinds to the caller, this will - // set UnwindDest to nullptr. - BasicBlock *UnwindDest = RI->getUnwindDest(); - Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr; - - // We're about to remove BB from the control flow. Before we do, sink any - // PHINodes into the unwind destination. Doing this before changing the - // control flow avoids some potentially slow checks, since we can currently - // be certain that UnwindDest and BB have no common predecessors (since they - // are both EH pads). - if (UnwindDest) { - // First, go through the PHI nodes in UnwindDest and update any nodes that - // reference the block we are removing - for (BasicBlock::iterator I = UnwindDest->begin(), - IE = DestEHPad->getIterator(); - I != IE; ++I) { - PHINode *DestPN = cast<PHINode>(I); - - int Idx = DestPN->getBasicBlockIndex(BB); - // Since BB unwinds to UnwindDest, it has to be in the PHI node. - assert(Idx != -1); - // This PHI node has an incoming value that corresponds to a control - // path through the cleanup pad we are removing. If the incoming - // value is in the cleanup pad, it must be a PHINode (because we - // verified above that the block is otherwise empty). Otherwise, the - // value is either a constant or a value that dominates the cleanup - // pad being removed. - // - // Because BB and UnwindDest are both EH pads, all of their - // predecessors must unwind to these blocks, and since no instruction - // can have multiple unwind destinations, there will be no overlap in - // incoming blocks between SrcPN and DestPN. - Value *SrcVal = DestPN->getIncomingValue(Idx); - PHINode *SrcPN = dyn_cast<PHINode>(SrcVal); - - // Remove the entry for the block we are deleting. - DestPN->removeIncomingValue(Idx, false); - - if (SrcPN && SrcPN->getParent() == BB) { - // If the incoming value was a PHI node in the cleanup pad we are - // removing, we need to merge that PHI node's incoming values into - // DestPN. - for (unsigned SrcIdx = 0, SrcE = SrcPN->getNumIncomingValues(); - SrcIdx != SrcE; ++SrcIdx) { - DestPN->addIncoming(SrcPN->getIncomingValue(SrcIdx), - SrcPN->getIncomingBlock(SrcIdx)); - } - } else { - // Otherwise, the incoming value came from above BB and - // so we can just reuse it. We must associate all of BB's - // predecessors with this value. - for (auto *pred : predecessors(BB)) { - DestPN->addIncoming(SrcVal, pred); - } - } - } - - // Sink any remaining PHI nodes directly into UnwindDest. - Instruction *InsertPt = DestEHPad; - for (BasicBlock::iterator I = BB->begin(), - IE = BB->getFirstNonPHI()->getIterator(); - I != IE;) { - // The iterator must be incremented here because the instructions are - // being moved to another block. - PHINode *PN = cast<PHINode>(I++); - if (PN->use_empty()) - // If the PHI node has no uses, just leave it. It will be erased - // when we erase BB below. - continue; - - // Otherwise, sink this PHI node into UnwindDest. - // Any predecessors to UnwindDest which are not already represented - // must be back edges which inherit the value from the path through - // BB. In this case, the PHI value must reference itself. - for (auto *pred : predecessors(UnwindDest)) - if (pred != BB) - PN->addIncoming(PN, pred); - PN->moveBefore(InsertPt); - } - } - - for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { - // The iterator must be updated here because we are removing this pred. - BasicBlock *PredBB = *PI++; - if (UnwindDest == nullptr) { - removeUnwindEdge(PredBB); - } else { - Instruction *TI = PredBB->getTerminator(); - TI->replaceUsesOfWith(BB, UnwindDest); - } - } - - // The cleanup pad is now unreachable. Zap it. - BB->eraseFromParent(); - return true; -} - -// Try to merge two cleanuppads together. -static bool mergeCleanupPad(CleanupReturnInst *RI) { - // Skip any cleanuprets which unwind to caller, there is nothing to merge - // with. - BasicBlock *UnwindDest = RI->getUnwindDest(); - if (!UnwindDest) - return false; - - // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't - // be safe to merge without code duplication. - if (UnwindDest->getSinglePredecessor() != RI->getParent()) - return false; - - // Verify that our cleanuppad's unwind destination is another cleanuppad. - auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front()); - if (!SuccessorCleanupPad) - return false; - - CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad(); - // Replace any uses of the successor cleanupad with the predecessor pad - // The only cleanuppad uses should be this cleanupret, it's cleanupret and - // funclet bundle operands. - SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad); - // Remove the old cleanuppad. - SuccessorCleanupPad->eraseFromParent(); - // Now, we simply replace the cleanupret with a branch to the unwind - // destination. - BranchInst::Create(UnwindDest, RI->getParent()); - RI->eraseFromParent(); - - return true; -} - -bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) { - // It is possible to transiantly have an undef cleanuppad operand because we - // have deleted some, but not all, dead blocks. - // Eventually, this block will be deleted. - if (isa<UndefValue>(RI->getOperand(0))) - return false; - - if (mergeCleanupPad(RI)) - return true; - - if (removeEmptyCleanup(RI)) - return true; - - return false; -} - -bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { - BasicBlock *BB = RI->getParent(); - if (!BB->getFirstNonPHIOrDbg()->isTerminator()) - return false; - - // Find predecessors that end with branches. - SmallVector<BasicBlock *, 8> UncondBranchPreds; - SmallVector<BranchInst *, 8> CondBranchPreds; - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *P = *PI; - Instruction *PTI = P->getTerminator(); - if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) { - if (BI->isUnconditional()) - UncondBranchPreds.push_back(P); - else - CondBranchPreds.push_back(BI); - } - } - - // If we found some, do the transformation! - if (!UncondBranchPreds.empty() && DupRet) { - while (!UncondBranchPreds.empty()) { - BasicBlock *Pred = UncondBranchPreds.pop_back_val(); - LLVM_DEBUG(dbgs() << "FOLDING: " << *BB - << "INTO UNCOND BRANCH PRED: " << *Pred); - (void)FoldReturnIntoUncondBranch(RI, BB, Pred); - } - - // If we eliminated all predecessors of the block, delete the block now. - if (pred_empty(BB)) { - // We know there are no successors, so just nuke the block. - if (LoopHeaders) - LoopHeaders->erase(BB); - BB->eraseFromParent(); - } - - return true; - } - - // Check out all of the conditional branches going to this return - // instruction. If any of them just select between returns, change the - // branch itself into a select/return pair. - while (!CondBranchPreds.empty()) { - BranchInst *BI = CondBranchPreds.pop_back_val(); - - // Check to see if the non-BB successor is also a return block. - if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) && - isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) && - SimplifyCondBranchToTwoReturns(BI, Builder)) - return true; - } - return false; -} - -bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { - BasicBlock *BB = UI->getParent(); - - bool Changed = false; - - // If there are any instructions immediately before the unreachable that can - // be removed, do so. - while (UI->getIterator() != BB->begin()) { - BasicBlock::iterator BBI = UI->getIterator(); - --BBI; - // Do not delete instructions that can have side effects which might cause - // the unreachable to not be reachable; specifically, calls and volatile - // operations may have this effect. - if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) - break; - - if (BBI->mayHaveSideEffects()) { - if (auto *SI = dyn_cast<StoreInst>(BBI)) { - if (SI->isVolatile()) - break; - } else if (auto *LI = dyn_cast<LoadInst>(BBI)) { - if (LI->isVolatile()) - break; - } else if (auto *RMWI = dyn_cast<AtomicRMWInst>(BBI)) { - if (RMWI->isVolatile()) - break; - } else if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) { - if (CXI->isVolatile()) - break; - } else if (isa<CatchPadInst>(BBI)) { - // A catchpad may invoke exception object constructors and such, which - // in some languages can be arbitrary code, so be conservative by - // default. - // For CoreCLR, it just involves a type test, so can be removed. - if (classifyEHPersonality(BB->getParent()->getPersonalityFn()) != - EHPersonality::CoreCLR) - break; - } else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) && - !isa<LandingPadInst>(BBI)) { - break; - } - // Note that deleting LandingPad's here is in fact okay, although it - // involves a bit of subtle reasoning. If this inst is a LandingPad, - // all the predecessors of this block will be the unwind edges of Invokes, - // and we can therefore guarantee this block will be erased. - } - - // Delete this instruction (any uses are guaranteed to be dead) - if (!BBI->use_empty()) - BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); - BBI->eraseFromParent(); - Changed = true; - } - - // If the unreachable instruction is the first in the block, take a gander - // at all of the predecessors of this instruction, and simplify them. - if (&BB->front() != UI) - return Changed; - - SmallVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB)); - for (unsigned i = 0, e = Preds.size(); i != e; ++i) { - Instruction *TI = Preds[i]->getTerminator(); - IRBuilder<> Builder(TI); - if (auto *BI = dyn_cast<BranchInst>(TI)) { - if (BI->isUnconditional()) { - if (BI->getSuccessor(0) == BB) { - new UnreachableInst(TI->getContext(), TI); - TI->eraseFromParent(); - Changed = true; - } - } else { - Value* Cond = BI->getCondition(); - if (BI->getSuccessor(0) == BB) { - Builder.CreateAssumption(Builder.CreateNot(Cond)); - Builder.CreateBr(BI->getSuccessor(1)); - EraseTerminatorAndDCECond(BI); - } else if (BI->getSuccessor(1) == BB) { - Builder.CreateAssumption(Cond); - Builder.CreateBr(BI->getSuccessor(0)); - EraseTerminatorAndDCECond(BI); - Changed = true; - } - } - } else if (auto *SI = dyn_cast<SwitchInst>(TI)) { - SwitchInstProfUpdateWrapper SU(*SI); - for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) { - if (i->getCaseSuccessor() != BB) { - ++i; - continue; - } - BB->removePredecessor(SU->getParent()); - i = SU.removeCase(i); - e = SU->case_end(); - Changed = true; - } - } else if (auto *II = dyn_cast<InvokeInst>(TI)) { - if (II->getUnwindDest() == BB) { - removeUnwindEdge(TI->getParent()); - Changed = true; - } - } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) { - if (CSI->getUnwindDest() == BB) { - removeUnwindEdge(TI->getParent()); - Changed = true; - continue; - } - - for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(), - E = CSI->handler_end(); - I != E; ++I) { - if (*I == BB) { - CSI->removeHandler(I); - --I; - --E; - Changed = true; - } - } - if (CSI->getNumHandlers() == 0) { - BasicBlock *CatchSwitchBB = CSI->getParent(); - if (CSI->hasUnwindDest()) { - // Redirect preds to the unwind dest - CatchSwitchBB->replaceAllUsesWith(CSI->getUnwindDest()); - } else { - // Rewrite all preds to unwind to caller (or from invoke to call). - SmallVector<BasicBlock *, 8> EHPreds(predecessors(CatchSwitchBB)); - for (BasicBlock *EHPred : EHPreds) - removeUnwindEdge(EHPred); - } - // The catchswitch is no longer reachable. - new UnreachableInst(CSI->getContext(), CSI); - CSI->eraseFromParent(); - Changed = true; - } - } else if (isa<CleanupReturnInst>(TI)) { - new UnreachableInst(TI->getContext(), TI); - TI->eraseFromParent(); - Changed = true; - } - } - - // If this block is now dead, remove it. - if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) { - // We know there are no successors, so just nuke the block. - if (LoopHeaders) - LoopHeaders->erase(BB); - BB->eraseFromParent(); - return true; - } - - return Changed; -} - -static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) { - assert(Cases.size() >= 1); - - array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate); - for (size_t I = 1, E = Cases.size(); I != E; ++I) { - if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1) - return false; - } - return true; -} - -/// Turn a switch with two reachable destinations into an integer range -/// comparison and branch. -static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { - assert(SI->getNumCases() > 1 && "Degenerate switch?"); - - bool HasDefault = - !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); - - // Partition the cases into two sets with different destinations. - BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr; - BasicBlock *DestB = nullptr; - SmallVector<ConstantInt *, 16> CasesA; - SmallVector<ConstantInt *, 16> CasesB; - - for (auto Case : SI->cases()) { - BasicBlock *Dest = Case.getCaseSuccessor(); - if (!DestA) - DestA = Dest; - if (Dest == DestA) { - CasesA.push_back(Case.getCaseValue()); - continue; - } - if (!DestB) - DestB = Dest; - if (Dest == DestB) { - CasesB.push_back(Case.getCaseValue()); - continue; - } - return false; // More than two destinations. - } - - assert(DestA && DestB && - "Single-destination switch should have been folded."); - assert(DestA != DestB); - assert(DestB != SI->getDefaultDest()); - assert(!CasesB.empty() && "There must be non-default cases."); - assert(!CasesA.empty() || HasDefault); - - // Figure out if one of the sets of cases form a contiguous range. - SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr; - BasicBlock *ContiguousDest = nullptr; - BasicBlock *OtherDest = nullptr; - if (!CasesA.empty() && CasesAreContiguous(CasesA)) { - ContiguousCases = &CasesA; - ContiguousDest = DestA; - OtherDest = DestB; - } else if (CasesAreContiguous(CasesB)) { - ContiguousCases = &CasesB; - ContiguousDest = DestB; - OtherDest = DestA; - } else - return false; - - // Start building the compare and branch. - - Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back()); - Constant *NumCases = - ConstantInt::get(Offset->getType(), ContiguousCases->size()); - - Value *Sub = SI->getCondition(); - if (!Offset->isNullValue()) - Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off"); - - Value *Cmp; - // If NumCases overflowed, then all possible values jump to the successor. - if (NumCases->isNullValue() && !ContiguousCases->empty()) - Cmp = ConstantInt::getTrue(SI->getContext()); - else - Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); - BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest); - - // Update weight for the newly-created conditional branch. - if (HasBranchWeights(SI)) { - SmallVector<uint64_t, 8> Weights; - GetBranchWeights(SI, Weights); - if (Weights.size() == 1 + SI->getNumCases()) { - uint64_t TrueWeight = 0; - uint64_t FalseWeight = 0; - for (size_t I = 0, E = Weights.size(); I != E; ++I) { - if (SI->getSuccessor(I) == ContiguousDest) - TrueWeight += Weights[I]; - else - FalseWeight += Weights[I]; - } - while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) { - TrueWeight /= 2; - FalseWeight /= 2; - } - setBranchWeights(NewBI, TrueWeight, FalseWeight); - } - } - - // Prune obsolete incoming values off the successors' PHI nodes. - for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) { - unsigned PreviousEdges = ContiguousCases->size(); - if (ContiguousDest == SI->getDefaultDest()) - ++PreviousEdges; - for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I) - cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); - } - for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) { - unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size(); - if (OtherDest == SI->getDefaultDest()) - ++PreviousEdges; - for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I) - cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); - } - - // Drop the switch. - SI->eraseFromParent(); - - return true; -} - -/// Compute masked bits for the condition of a switch -/// and use it to remove dead cases. -static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, - const DataLayout &DL) { - Value *Cond = SI->getCondition(); - unsigned Bits = Cond->getType()->getIntegerBitWidth(); - KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI); - - // We can also eliminate cases by determining that their values are outside of - // the limited range of the condition based on how many significant (non-sign) - // bits are in the condition value. - unsigned ExtraSignBits = ComputeNumSignBits(Cond, DL, 0, AC, SI) - 1; - unsigned MaxSignificantBitsInCond = Bits - ExtraSignBits; - - // Gather dead cases. - SmallVector<ConstantInt *, 8> DeadCases; - for (auto &Case : SI->cases()) { - const APInt &CaseVal = Case.getCaseValue()->getValue(); - if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) || - (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) { - DeadCases.push_back(Case.getCaseValue()); - LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal - << " is dead.\n"); - } - } - - // If we can prove that the cases must cover all possible values, the - // default destination becomes dead and we can remove it. If we know some - // of the bits in the value, we can use that to more precisely compute the - // number of possible unique case values. - bool HasDefault = - !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); - const unsigned NumUnknownBits = - Bits - (Known.Zero | Known.One).countPopulation(); - assert(NumUnknownBits <= Bits); - if (HasDefault && DeadCases.empty() && - NumUnknownBits < 64 /* avoid overflow */ && - SI->getNumCases() == (1ULL << NumUnknownBits)) { - LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); - BasicBlock *NewDefault = - SplitBlockPredecessors(SI->getDefaultDest(), SI->getParent(), ""); - SI->setDefaultDest(&*NewDefault); - SplitBlock(&*NewDefault, &NewDefault->front()); - auto *OldTI = NewDefault->getTerminator(); - new UnreachableInst(SI->getContext(), OldTI); - EraseTerminatorAndDCECond(OldTI); - return true; - } - - if (DeadCases.empty()) - return false; - - SwitchInstProfUpdateWrapper SIW(*SI); - for (ConstantInt *DeadCase : DeadCases) { - SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase); - assert(CaseI != SI->case_default() && - "Case was not found. Probably mistake in DeadCases forming."); - // Prune unused values from PHI nodes. - CaseI->getCaseSuccessor()->removePredecessor(SI->getParent()); - SIW.removeCase(CaseI); - } - - return true; -} - -/// If BB would be eligible for simplification by -/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated -/// by an unconditional branch), look at the phi node for BB in the successor -/// block and see if the incoming value is equal to CaseValue. If so, return -/// the phi node, and set PhiIndex to BB's index in the phi node. -static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue, - BasicBlock *BB, int *PhiIndex) { - if (BB->getFirstNonPHIOrDbg() != BB->getTerminator()) - return nullptr; // BB must be empty to be a candidate for simplification. - if (!BB->getSinglePredecessor()) - return nullptr; // BB must be dominated by the switch. - - BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator()); - if (!Branch || !Branch->isUnconditional()) - return nullptr; // Terminator must be unconditional branch. - - BasicBlock *Succ = Branch->getSuccessor(0); - - for (PHINode &PHI : Succ->phis()) { - int Idx = PHI.getBasicBlockIndex(BB); - assert(Idx >= 0 && "PHI has no entry for predecessor?"); - - Value *InValue = PHI.getIncomingValue(Idx); - if (InValue != CaseValue) - continue; - - *PhiIndex = Idx; - return &PHI; - } - - return nullptr; -} - -/// Try to forward the condition of a switch instruction to a phi node -/// dominated by the switch, if that would mean that some of the destination -/// blocks of the switch can be folded away. Return true if a change is made. -static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { - using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>; - - ForwardingNodesMap ForwardingNodes; - BasicBlock *SwitchBlock = SI->getParent(); - bool Changed = false; - for (auto &Case : SI->cases()) { - ConstantInt *CaseValue = Case.getCaseValue(); - BasicBlock *CaseDest = Case.getCaseSuccessor(); - - // Replace phi operands in successor blocks that are using the constant case - // value rather than the switch condition variable: - // switchbb: - // switch i32 %x, label %default [ - // i32 17, label %succ - // ... - // succ: - // %r = phi i32 ... [ 17, %switchbb ] ... - // --> - // %r = phi i32 ... [ %x, %switchbb ] ... - - for (PHINode &Phi : CaseDest->phis()) { - // This only works if there is exactly 1 incoming edge from the switch to - // a phi. If there is >1, that means multiple cases of the switch map to 1 - // value in the phi, and that phi value is not the switch condition. Thus, - // this transform would not make sense (the phi would be invalid because - // a phi can't have different incoming values from the same block). - int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock); - if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue && - count(Phi.blocks(), SwitchBlock) == 1) { - Phi.setIncomingValue(SwitchBBIdx, SI->getCondition()); - Changed = true; - } - } - - // Collect phi nodes that are indirectly using this switch's case constants. - int PhiIdx; - if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx)) - ForwardingNodes[Phi].push_back(PhiIdx); - } - - for (auto &ForwardingNode : ForwardingNodes) { - PHINode *Phi = ForwardingNode.first; - SmallVectorImpl<int> &Indexes = ForwardingNode.second; - if (Indexes.size() < 2) - continue; - - for (int Index : Indexes) - Phi->setIncomingValue(Index, SI->getCondition()); - Changed = true; - } - - return Changed; -} - -/// Return true if the backend will be able to handle -/// initializing an array of constants like C. -static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) { - if (C->isThreadDependent()) - return false; - if (C->isDLLImportDependent()) - return false; - - if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) && - !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) && - !isa<UndefValue>(C) && !isa<ConstantExpr>(C)) - return false; - - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { - if (!CE->isGEPWithNoNotionalOverIndexing()) - return false; - if (!ValidLookupTableConstant(CE->getOperand(0), TTI)) - return false; - } - - if (!TTI.shouldBuildLookupTablesForConstant(C)) - return false; - - return true; -} - -/// If V is a Constant, return it. Otherwise, try to look up -/// its constant value in ConstantPool, returning 0 if it's not there. -static Constant * -LookupConstant(Value *V, - const SmallDenseMap<Value *, Constant *> &ConstantPool) { - if (Constant *C = dyn_cast<Constant>(V)) - return C; - return ConstantPool.lookup(V); -} - -/// Try to fold instruction I into a constant. This works for -/// simple instructions such as binary operations where both operands are -/// constant or can be replaced by constants from the ConstantPool. Returns the -/// resulting constant on success, 0 otherwise. -static Constant * -ConstantFold(Instruction *I, const DataLayout &DL, - const SmallDenseMap<Value *, Constant *> &ConstantPool) { - if (SelectInst *Select = dyn_cast<SelectInst>(I)) { - Constant *A = LookupConstant(Select->getCondition(), ConstantPool); - if (!A) - return nullptr; - if (A->isAllOnesValue()) - return LookupConstant(Select->getTrueValue(), ConstantPool); - if (A->isNullValue()) - return LookupConstant(Select->getFalseValue(), ConstantPool); - return nullptr; - } - - SmallVector<Constant *, 4> COps; - for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) { - if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool)) - COps.push_back(A); - else - return nullptr; - } - - if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) { - return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0], - COps[1], DL); - } - - return ConstantFoldInstOperands(I, COps, DL); -} - -/// Try to determine the resulting constant values in phi nodes -/// at the common destination basic block, *CommonDest, for one of the case -/// destionations CaseDest corresponding to value CaseVal (0 for the default -/// case), of a switch instruction SI. -static bool -GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, - BasicBlock **CommonDest, - SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res, - const DataLayout &DL, const TargetTransformInfo &TTI) { - // The block from which we enter the common destination. - BasicBlock *Pred = SI->getParent(); - - // If CaseDest is empty except for some side-effect free instructions through - // which we can constant-propagate the CaseVal, continue to its successor. - SmallDenseMap<Value *, Constant *> ConstantPool; - ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal)); - for (Instruction &I :CaseDest->instructionsWithoutDebug()) { - if (I.isTerminator()) { - // If the terminator is a simple branch, continue to the next block. - if (I.getNumSuccessors() != 1 || I.isExceptionalTerminator()) - return false; - Pred = CaseDest; - CaseDest = I.getSuccessor(0); - } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) { - // Instruction is side-effect free and constant. - - // If the instruction has uses outside this block or a phi node slot for - // the block, it is not safe to bypass the instruction since it would then - // no longer dominate all its uses. - for (auto &Use : I.uses()) { - User *User = Use.getUser(); - if (Instruction *I = dyn_cast<Instruction>(User)) - if (I->getParent() == CaseDest) - continue; - if (PHINode *Phi = dyn_cast<PHINode>(User)) - if (Phi->getIncomingBlock(Use) == CaseDest) - continue; - return false; - } - - ConstantPool.insert(std::make_pair(&I, C)); - } else { - break; - } - } - - // If we did not have a CommonDest before, use the current one. - if (!*CommonDest) - *CommonDest = CaseDest; - // If the destination isn't the common one, abort. - if (CaseDest != *CommonDest) - return false; - - // Get the values for this case from phi nodes in the destination block. - for (PHINode &PHI : (*CommonDest)->phis()) { - int Idx = PHI.getBasicBlockIndex(Pred); - if (Idx == -1) - continue; - - Constant *ConstVal = - LookupConstant(PHI.getIncomingValue(Idx), ConstantPool); - if (!ConstVal) - return false; - - // Be conservative about which kinds of constants we support. - if (!ValidLookupTableConstant(ConstVal, TTI)) - return false; - - Res.push_back(std::make_pair(&PHI, ConstVal)); - } - - return Res.size() > 0; -} - -// Helper function used to add CaseVal to the list of cases that generate -// Result. Returns the updated number of cases that generate this result. -static uintptr_t MapCaseToResult(ConstantInt *CaseVal, - SwitchCaseResultVectorTy &UniqueResults, - Constant *Result) { - for (auto &I : UniqueResults) { - if (I.first == Result) { - I.second.push_back(CaseVal); - return I.second.size(); - } - } - UniqueResults.push_back( - std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal))); - return 1; -} - -// Helper function that initializes a map containing -// results for the PHI node of the common destination block for a switch -// instruction. Returns false if multiple PHI nodes have been found or if -// there is not a common destination block for the switch. -static bool -InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, - SwitchCaseResultVectorTy &UniqueResults, - Constant *&DefaultResult, const DataLayout &DL, - const TargetTransformInfo &TTI, - uintptr_t MaxUniqueResults, uintptr_t MaxCasesPerResult) { - for (auto &I : SI->cases()) { - ConstantInt *CaseVal = I.getCaseValue(); - - // Resulting value at phi nodes for this case value. - SwitchCaseResultsTy Results; - if (!GetCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results, - DL, TTI)) - return false; - - // Only one value per case is permitted. - if (Results.size() > 1) - return false; - - // Add the case->result mapping to UniqueResults. - const uintptr_t NumCasesForResult = - MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second); - - // Early out if there are too many cases for this result. - if (NumCasesForResult > MaxCasesPerResult) - return false; - - // Early out if there are too many unique results. - if (UniqueResults.size() > MaxUniqueResults) - return false; - - // Check the PHI consistency. - if (!PHI) - PHI = Results[0].first; - else if (PHI != Results[0].first) - return false; - } - // Find the default result value. - SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults; - BasicBlock *DefaultDest = SI->getDefaultDest(); - GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults, - DL, TTI); - // If the default value is not found abort unless the default destination - // is unreachable. - DefaultResult = - DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr; - if ((!DefaultResult && - !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg()))) - return false; - - return true; -} - -// Helper function that checks if it is possible to transform a switch with only -// two cases (or two cases + default) that produces a result into a select. -// Example: -// switch (a) { -// case 10: %0 = icmp eq i32 %a, 10 -// return 10; %1 = select i1 %0, i32 10, i32 4 -// case 20: ----> %2 = icmp eq i32 %a, 20 -// return 2; %3 = select i1 %2, i32 2, i32 %1 -// default: -// return 4; -// } -static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector, - Constant *DefaultResult, Value *Condition, - IRBuilder<> &Builder) { - assert(ResultVector.size() == 2 && - "We should have exactly two unique results at this point"); - // If we are selecting between only two cases transform into a simple - // select or a two-way select if default is possible. - if (ResultVector[0].second.size() == 1 && - ResultVector[1].second.size() == 1) { - ConstantInt *const FirstCase = ResultVector[0].second[0]; - ConstantInt *const SecondCase = ResultVector[1].second[0]; - - bool DefaultCanTrigger = DefaultResult; - Value *SelectValue = ResultVector[1].first; - if (DefaultCanTrigger) { - Value *const ValueCompare = - Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp"); - SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first, - DefaultResult, "switch.select"); - } - Value *const ValueCompare = - Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp"); - return Builder.CreateSelect(ValueCompare, ResultVector[0].first, - SelectValue, "switch.select"); - } - - return nullptr; -} - -// Helper function to cleanup a switch instruction that has been converted into -// a select, fixing up PHI nodes and basic blocks. -static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI, - Value *SelectValue, - IRBuilder<> &Builder) { - BasicBlock *SelectBB = SI->getParent(); - while (PHI->getBasicBlockIndex(SelectBB) >= 0) - PHI->removeIncomingValue(SelectBB); - PHI->addIncoming(SelectValue, SelectBB); - - Builder.CreateBr(PHI->getParent()); - - // Remove the switch. - for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { - BasicBlock *Succ = SI->getSuccessor(i); - - if (Succ == PHI->getParent()) - continue; - Succ->removePredecessor(SelectBB); - } - SI->eraseFromParent(); -} - -/// If the switch is only used to initialize one or more -/// phi nodes in a common successor block with only two different -/// constant values, replace the switch with select. -static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder, - const DataLayout &DL, - const TargetTransformInfo &TTI) { - Value *const Cond = SI->getCondition(); - PHINode *PHI = nullptr; - BasicBlock *CommonDest = nullptr; - Constant *DefaultResult; - SwitchCaseResultVectorTy UniqueResults; - // Collect all the cases that will deliver the same value from the switch. - if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult, - DL, TTI, 2, 1)) - return false; - // Selects choose between maximum two values. - if (UniqueResults.size() != 2) - return false; - assert(PHI != nullptr && "PHI for value select not found"); - - Builder.SetInsertPoint(SI); - Value *SelectValue = - ConvertTwoCaseSwitch(UniqueResults, DefaultResult, Cond, Builder); - if (SelectValue) { - RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder); - return true; - } - // The switch couldn't be converted into a select. - return false; -} - -namespace { - -/// This class represents a lookup table that can be used to replace a switch. -class SwitchLookupTable { -public: - /// Create a lookup table to use as a switch replacement with the contents - /// of Values, using DefaultValue to fill any holes in the table. - SwitchLookupTable( - Module &M, uint64_t TableSize, ConstantInt *Offset, - const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values, - Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName); - - /// Build instructions with Builder to retrieve the value at - /// the position given by Index in the lookup table. - Value *BuildLookup(Value *Index, IRBuilder<> &Builder); - - /// Return true if a table with TableSize elements of - /// type ElementType would fit in a target-legal register. - static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize, - Type *ElementType); - -private: - // Depending on the contents of the table, it can be represented in - // different ways. - enum { - // For tables where each element contains the same value, we just have to - // store that single value and return it for each lookup. - SingleValueKind, - - // For tables where there is a linear relationship between table index - // and values. We calculate the result with a simple multiplication - // and addition instead of a table lookup. - LinearMapKind, - - // For small tables with integer elements, we can pack them into a bitmap - // that fits into a target-legal register. Values are retrieved by - // shift and mask operations. - BitMapKind, - - // The table is stored as an array of values. Values are retrieved by load - // instructions from the table. - ArrayKind - } Kind; - - // For SingleValueKind, this is the single value. - Constant *SingleValue = nullptr; - - // For BitMapKind, this is the bitmap. - ConstantInt *BitMap = nullptr; - IntegerType *BitMapElementTy = nullptr; - - // For LinearMapKind, these are the constants used to derive the value. - ConstantInt *LinearOffset = nullptr; - ConstantInt *LinearMultiplier = nullptr; - - // For ArrayKind, this is the array. - GlobalVariable *Array = nullptr; -}; - -} // end anonymous namespace - -SwitchLookupTable::SwitchLookupTable( - Module &M, uint64_t TableSize, ConstantInt *Offset, - const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values, - Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) { - assert(Values.size() && "Can't build lookup table without values!"); - assert(TableSize >= Values.size() && "Can't fit values in table!"); - - // If all values in the table are equal, this is that value. - SingleValue = Values.begin()->second; - - Type *ValueType = Values.begin()->second->getType(); - - // Build up the table contents. - SmallVector<Constant *, 64> TableContents(TableSize); - for (size_t I = 0, E = Values.size(); I != E; ++I) { - ConstantInt *CaseVal = Values[I].first; - Constant *CaseRes = Values[I].second; - assert(CaseRes->getType() == ValueType); - - uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue(); - TableContents[Idx] = CaseRes; - - if (CaseRes != SingleValue) - SingleValue = nullptr; - } - - // Fill in any holes in the table with the default result. - if (Values.size() < TableSize) { - assert(DefaultValue && - "Need a default value to fill the lookup table holes."); - assert(DefaultValue->getType() == ValueType); - for (uint64_t I = 0; I < TableSize; ++I) { - if (!TableContents[I]) - TableContents[I] = DefaultValue; - } - - if (DefaultValue != SingleValue) - SingleValue = nullptr; - } - - // If each element in the table contains the same value, we only need to store - // that single value. - if (SingleValue) { - Kind = SingleValueKind; - return; - } - - // Check if we can derive the value with a linear transformation from the - // table index. - if (isa<IntegerType>(ValueType)) { - bool LinearMappingPossible = true; - APInt PrevVal; - APInt DistToPrev; - assert(TableSize >= 2 && "Should be a SingleValue table."); - // Check if there is the same distance between two consecutive values. - for (uint64_t I = 0; I < TableSize; ++I) { - ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]); - if (!ConstVal) { - // This is an undef. We could deal with it, but undefs in lookup tables - // are very seldom. It's probably not worth the additional complexity. - LinearMappingPossible = false; - break; - } - const APInt &Val = ConstVal->getValue(); - if (I != 0) { - APInt Dist = Val - PrevVal; - if (I == 1) { - DistToPrev = Dist; - } else if (Dist != DistToPrev) { - LinearMappingPossible = false; - break; - } - } - PrevVal = Val; - } - if (LinearMappingPossible) { - LinearOffset = cast<ConstantInt>(TableContents[0]); - LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev); - Kind = LinearMapKind; - ++NumLinearMaps; - return; - } - } - - // If the type is integer and the table fits in a register, build a bitmap. - if (WouldFitInRegister(DL, TableSize, ValueType)) { - IntegerType *IT = cast<IntegerType>(ValueType); - APInt TableInt(TableSize * IT->getBitWidth(), 0); - for (uint64_t I = TableSize; I > 0; --I) { - TableInt <<= IT->getBitWidth(); - // Insert values into the bitmap. Undef values are set to zero. - if (!isa<UndefValue>(TableContents[I - 1])) { - ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]); - TableInt |= Val->getValue().zext(TableInt.getBitWidth()); - } - } - BitMap = ConstantInt::get(M.getContext(), TableInt); - BitMapElementTy = IT; - Kind = BitMapKind; - ++NumBitMaps; - return; - } - - // Store the table in an array. - ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize); - Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); - - Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true, - GlobalVariable::PrivateLinkage, Initializer, - "switch.table." + FuncName); - Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); - // Set the alignment to that of an array items. We will be only loading one - // value out of it. - Array->setAlignment(DL.getPrefTypeAlignment(ValueType)); - Kind = ArrayKind; -} - -Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { - switch (Kind) { - case SingleValueKind: - return SingleValue; - case LinearMapKind: { - // Derive the result value from the input value. - Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(), - false, "switch.idx.cast"); - if (!LinearMultiplier->isOne()) - Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult"); - if (!LinearOffset->isZero()) - Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset"); - return Result; - } - case BitMapKind: { - // Type of the bitmap (e.g. i59). - IntegerType *MapTy = BitMap->getType(); - - // Cast Index to the same type as the bitmap. - // Note: The Index is <= the number of elements in the table, so - // truncating it to the width of the bitmask is safe. - Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast"); - - // Multiply the shift amount by the element width. - ShiftAmt = Builder.CreateMul( - ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()), - "switch.shiftamt"); - - // Shift down. - Value *DownShifted = - Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift"); - // Mask off. - return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked"); - } - case ArrayKind: { - // Make sure the table index will not overflow when treated as signed. - IntegerType *IT = cast<IntegerType>(Index->getType()); - uint64_t TableSize = - Array->getInitializer()->getType()->getArrayNumElements(); - if (TableSize > (1ULL << (IT->getBitWidth() - 1))) - Index = Builder.CreateZExt( - Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1), - "switch.tableidx.zext"); - - Value *GEPIndices[] = {Builder.getInt32(0), Index}; - Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array, - GEPIndices, "switch.gep"); - return Builder.CreateLoad( - cast<ArrayType>(Array->getValueType())->getElementType(), GEP, - "switch.load"); - } - } - llvm_unreachable("Unknown lookup table kind!"); -} - -bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL, - uint64_t TableSize, - Type *ElementType) { - auto *IT = dyn_cast<IntegerType>(ElementType); - if (!IT) - return false; - // FIXME: If the type is wider than it needs to be, e.g. i8 but all values - // are <= 15, we could try to narrow the type. - - // Avoid overflow, fitsInLegalInteger uses unsigned int for the width. - if (TableSize >= UINT_MAX / IT->getBitWidth()) - return false; - return DL.fitsInLegalInteger(TableSize * IT->getBitWidth()); -} - -/// Determine whether a lookup table should be built for this switch, based on -/// the number of cases, size of the table, and the types of the results. -static bool -ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, - const TargetTransformInfo &TTI, const DataLayout &DL, - const SmallDenseMap<PHINode *, Type *> &ResultTypes) { - if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10) - return false; // TableSize overflowed, or mul below might overflow. - - bool AllTablesFitInRegister = true; - bool HasIllegalType = false; - for (const auto &I : ResultTypes) { - Type *Ty = I.second; - - // Saturate this flag to true. - HasIllegalType = HasIllegalType || !TTI.isTypeLegal(Ty); - - // Saturate this flag to false. - AllTablesFitInRegister = - AllTablesFitInRegister && - SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty); - - // If both flags saturate, we're done. NOTE: This *only* works with - // saturating flags, and all flags have to saturate first due to the - // non-deterministic behavior of iterating over a dense map. - if (HasIllegalType && !AllTablesFitInRegister) - break; - } - - // If each table would fit in a register, we should build it anyway. - if (AllTablesFitInRegister) - return true; - - // Don't build a table that doesn't fit in-register if it has illegal types. - if (HasIllegalType) - return false; - - // The table density should be at least 40%. This is the same criterion as for - // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. - // FIXME: Find the best cut-off. - return SI->getNumCases() * 10 >= TableSize * 4; -} - -/// Try to reuse the switch table index compare. Following pattern: -/// \code -/// if (idx < tablesize) -/// r = table[idx]; // table does not contain default_value -/// else -/// r = default_value; -/// if (r != default_value) -/// ... -/// \endcode -/// Is optimized to: -/// \code -/// cond = idx < tablesize; -/// if (cond) -/// r = table[idx]; -/// else -/// r = default_value; -/// if (cond) -/// ... -/// \endcode -/// Jump threading will then eliminate the second if(cond). -static void reuseTableCompare( - User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, - Constant *DefaultValue, - const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) { - ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser); - if (!CmpInst) - return; - - // We require that the compare is in the same block as the phi so that jump - // threading can do its work afterwards. - if (CmpInst->getParent() != PhiBlock) - return; - - Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1)); - if (!CmpOp1) - return; - - Value *RangeCmp = RangeCheckBranch->getCondition(); - Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType()); - Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType()); - - // Check if the compare with the default value is constant true or false. - Constant *DefaultConst = ConstantExpr::getICmp(CmpInst->getPredicate(), - DefaultValue, CmpOp1, true); - if (DefaultConst != TrueConst && DefaultConst != FalseConst) - return; - - // Check if the compare with the case values is distinct from the default - // compare result. - for (auto ValuePair : Values) { - Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(), - ValuePair.second, CmpOp1, true); - if (!CaseConst || CaseConst == DefaultConst || isa<UndefValue>(CaseConst)) - return; - assert((CaseConst == TrueConst || CaseConst == FalseConst) && - "Expect true or false as compare result."); - } - - // Check if the branch instruction dominates the phi node. It's a simple - // dominance check, but sufficient for our needs. - // Although this check is invariant in the calling loops, it's better to do it - // at this late stage. Practically we do it at most once for a switch. - BasicBlock *BranchBlock = RangeCheckBranch->getParent(); - for (auto PI = pred_begin(PhiBlock), E = pred_end(PhiBlock); PI != E; ++PI) { - BasicBlock *Pred = *PI; - if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock) - return; - } - - if (DefaultConst == FalseConst) { - // The compare yields the same result. We can replace it. - CmpInst->replaceAllUsesWith(RangeCmp); - ++NumTableCmpReuses; - } else { - // The compare yields the same result, just inverted. We can replace it. - Value *InvertedTableCmp = BinaryOperator::CreateXor( - RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp", - RangeCheckBranch); - CmpInst->replaceAllUsesWith(InvertedTableCmp); - ++NumTableCmpReuses; - } -} - -/// If the switch is only used to initialize one or more phi nodes in a common -/// successor block with different constant values, replace the switch with -/// lookup tables. -static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, - const DataLayout &DL, - const TargetTransformInfo &TTI) { - assert(SI->getNumCases() > 1 && "Degenerate switch?"); - - Function *Fn = SI->getParent()->getParent(); - // Only build lookup table when we have a target that supports it or the - // attribute is not set. - if (!TTI.shouldBuildLookupTables() || - (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")) - return false; - - // FIXME: If the switch is too sparse for a lookup table, perhaps we could - // split off a dense part and build a lookup table for that. - - // FIXME: This creates arrays of GEPs to constant strings, which means each - // GEP needs a runtime relocation in PIC code. We should just build one big - // string and lookup indices into that. - - // Ignore switches with less than three cases. Lookup tables will not make - // them faster, so we don't analyze them. - if (SI->getNumCases() < 3) - return false; - - // Figure out the corresponding result for each case value and phi node in the - // common destination, as well as the min and max case values. - assert(!empty(SI->cases())); - SwitchInst::CaseIt CI = SI->case_begin(); - ConstantInt *MinCaseVal = CI->getCaseValue(); - ConstantInt *MaxCaseVal = CI->getCaseValue(); - - BasicBlock *CommonDest = nullptr; - - using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>; - SmallDenseMap<PHINode *, ResultListTy> ResultLists; - - SmallDenseMap<PHINode *, Constant *> DefaultResults; - SmallDenseMap<PHINode *, Type *> ResultTypes; - SmallVector<PHINode *, 4> PHIs; - - for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) { - ConstantInt *CaseVal = CI->getCaseValue(); - if (CaseVal->getValue().slt(MinCaseVal->getValue())) - MinCaseVal = CaseVal; - if (CaseVal->getValue().sgt(MaxCaseVal->getValue())) - MaxCaseVal = CaseVal; - - // Resulting value at phi nodes for this case value. - using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>; - ResultsTy Results; - if (!GetCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest, - Results, DL, TTI)) - return false; - - // Append the result from this case to the list for each phi. - for (const auto &I : Results) { - PHINode *PHI = I.first; - Constant *Value = I.second; - if (!ResultLists.count(PHI)) - PHIs.push_back(PHI); - ResultLists[PHI].push_back(std::make_pair(CaseVal, Value)); - } - } - - // Keep track of the result types. - for (PHINode *PHI : PHIs) { - ResultTypes[PHI] = ResultLists[PHI][0].second->getType(); - } - - uint64_t NumResults = ResultLists[PHIs[0]].size(); - APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); - uint64_t TableSize = RangeSpread.getLimitedValue() + 1; - bool TableHasHoles = (NumResults < TableSize); - - // If the table has holes, we need a constant result for the default case - // or a bitmask that fits in a register. - SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList; - bool HasDefaultResults = - GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, - DefaultResultsList, DL, TTI); - - bool NeedMask = (TableHasHoles && !HasDefaultResults); - if (NeedMask) { - // As an extra penalty for the validity test we require more cases. - if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark). - return false; - if (!DL.fitsInLegalInteger(TableSize)) - return false; - } - - for (const auto &I : DefaultResultsList) { - PHINode *PHI = I.first; - Constant *Result = I.second; - DefaultResults[PHI] = Result; - } - - if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes)) - return false; - - // Create the BB that does the lookups. - Module &Mod = *CommonDest->getParent()->getParent(); - BasicBlock *LookupBB = BasicBlock::Create( - Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest); - - // Compute the table index value. - Builder.SetInsertPoint(SI); - Value *TableIndex; - if (MinCaseVal->isNullValue()) - TableIndex = SI->getCondition(); - else - TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal, - "switch.tableidx"); - - // Compute the maximum table size representable by the integer type we are - // switching upon. - unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits(); - uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize; - assert(MaxTableSize >= TableSize && - "It is impossible for a switch to have more entries than the max " - "representable value of its input integer type's size."); - - // If the default destination is unreachable, or if the lookup table covers - // all values of the conditional variable, branch directly to the lookup table - // BB. Otherwise, check that the condition is within the case range. - const bool DefaultIsReachable = - !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); - const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize); - BranchInst *RangeCheckBranch = nullptr; - - if (!DefaultIsReachable || GeneratingCoveredLookupTable) { - Builder.CreateBr(LookupBB); - // Note: We call removeProdecessor later since we need to be able to get the - // PHI value for the default case in case we're using a bit mask. - } else { - Value *Cmp = Builder.CreateICmpULT( - TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize)); - RangeCheckBranch = - Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); - } - - // Populate the BB that does the lookups. - Builder.SetInsertPoint(LookupBB); - - if (NeedMask) { - // Before doing the lookup, we do the hole check. The LookupBB is therefore - // re-purposed to do the hole check, and we create a new LookupBB. - BasicBlock *MaskBB = LookupBB; - MaskBB->setName("switch.hole_check"); - LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup", - CommonDest->getParent(), CommonDest); - - // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid - // unnecessary illegal types. - uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL)); - APInt MaskInt(TableSizePowOf2, 0); - APInt One(TableSizePowOf2, 1); - // Build bitmask; fill in a 1 bit for every case. - const ResultListTy &ResultList = ResultLists[PHIs[0]]; - for (size_t I = 0, E = ResultList.size(); I != E; ++I) { - uint64_t Idx = (ResultList[I].first->getValue() - MinCaseVal->getValue()) - .getLimitedValue(); - MaskInt |= One << Idx; - } - ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt); - - // Get the TableIndex'th bit of the bitmask. - // If this bit is 0 (meaning hole) jump to the default destination, - // else continue with table lookup. - IntegerType *MapTy = TableMask->getType(); - Value *MaskIndex = - Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex"); - Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted"); - Value *LoBit = Builder.CreateTrunc( - Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit"); - Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest()); - - Builder.SetInsertPoint(LookupBB); - AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, SI->getParent()); - } - - if (!DefaultIsReachable || GeneratingCoveredLookupTable) { - // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later, - // do not delete PHINodes here. - SI->getDefaultDest()->removePredecessor(SI->getParent(), - /*KeepOneInputPHIs=*/true); - } - - bool ReturnedEarly = false; - for (PHINode *PHI : PHIs) { - const ResultListTy &ResultList = ResultLists[PHI]; - - // If using a bitmask, use any value to fill the lookup table holes. - Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI]; - StringRef FuncName = Fn->getName(); - SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL, - FuncName); - - Value *Result = Table.BuildLookup(TableIndex, Builder); - - // If the result is used to return immediately from the function, we want to - // do that right here. - if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->user_begin()) && - PHI->user_back() == CommonDest->getFirstNonPHIOrDbg()) { - Builder.CreateRet(Result); - ReturnedEarly = true; - break; - } - - // Do a small peephole optimization: re-use the switch table compare if - // possible. - if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) { - BasicBlock *PhiBlock = PHI->getParent(); - // Search for compare instructions which use the phi. - for (auto *User : PHI->users()) { - reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList); - } - } - - PHI->addIncoming(Result, LookupBB); - } - - if (!ReturnedEarly) - Builder.CreateBr(CommonDest); - - // Remove the switch. - for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { - BasicBlock *Succ = SI->getSuccessor(i); - - if (Succ == SI->getDefaultDest()) - continue; - Succ->removePredecessor(SI->getParent()); - } - SI->eraseFromParent(); - - ++NumLookupTables; - if (NeedMask) - ++NumLookupTablesHoles; - return true; -} - -static bool isSwitchDense(ArrayRef<int64_t> Values) { - // See also SelectionDAGBuilder::isDense(), which this function was based on. - uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front(); - uint64_t Range = Diff + 1; - uint64_t NumCases = Values.size(); - // 40% is the default density for building a jump table in optsize/minsize mode. - uint64_t MinDensity = 40; - - return NumCases * 100 >= Range * MinDensity; -} - -/// Try to transform a switch that has "holes" in it to a contiguous sequence -/// of cases. -/// -/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be -/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}. -/// -/// This converts a sparse switch into a dense switch which allows better -/// lowering and could also allow transforming into a lookup table. -static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, - const DataLayout &DL, - const TargetTransformInfo &TTI) { - auto *CondTy = cast<IntegerType>(SI->getCondition()->getType()); - if (CondTy->getIntegerBitWidth() > 64 || - !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth())) - return false; - // Only bother with this optimization if there are more than 3 switch cases; - // SDAG will only bother creating jump tables for 4 or more cases. - if (SI->getNumCases() < 4) - return false; - - // This transform is agnostic to the signedness of the input or case values. We - // can treat the case values as signed or unsigned. We can optimize more common - // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values - // as signed. - SmallVector<int64_t,4> Values; - for (auto &C : SI->cases()) - Values.push_back(C.getCaseValue()->getValue().getSExtValue()); - llvm::sort(Values); - - // If the switch is already dense, there's nothing useful to do here. - if (isSwitchDense(Values)) - return false; - - // First, transform the values such that they start at zero and ascend. - int64_t Base = Values[0]; - for (auto &V : Values) - V -= (uint64_t)(Base); - - // Now we have signed numbers that have been shifted so that, given enough - // precision, there are no negative values. Since the rest of the transform - // is bitwise only, we switch now to an unsigned representation. - - // This transform can be done speculatively because it is so cheap - it - // results in a single rotate operation being inserted. - // FIXME: It's possible that optimizing a switch on powers of two might also - // be beneficial - flag values are often powers of two and we could use a CLZ - // as the key function. - - // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than - // one element and LLVM disallows duplicate cases, Shift is guaranteed to be - // less than 64. - unsigned Shift = 64; - for (auto &V : Values) - Shift = std::min(Shift, countTrailingZeros((uint64_t)V)); - assert(Shift < 64); - if (Shift > 0) - for (auto &V : Values) - V = (int64_t)((uint64_t)V >> Shift); - - if (!isSwitchDense(Values)) - // Transform didn't create a dense switch. - return false; - - // The obvious transform is to shift the switch condition right and emit a - // check that the condition actually cleanly divided by GCD, i.e. - // C & (1 << Shift - 1) == 0 - // inserting a new CFG edge to handle the case where it didn't divide cleanly. - // - // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the - // shift and puts the shifted-off bits in the uppermost bits. If any of these - // are nonzero then the switch condition will be very large and will hit the - // default case. - - auto *Ty = cast<IntegerType>(SI->getCondition()->getType()); - Builder.SetInsertPoint(SI); - auto *ShiftC = ConstantInt::get(Ty, Shift); - auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base)); - auto *LShr = Builder.CreateLShr(Sub, ShiftC); - auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift); - auto *Rot = Builder.CreateOr(LShr, Shl); - SI->replaceUsesOfWith(SI->getCondition(), Rot); - - for (auto Case : SI->cases()) { - auto *Orig = Case.getCaseValue(); - auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base); - Case.setValue( - cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue())))); - } - return true; -} - -bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { - BasicBlock *BB = SI->getParent(); - - if (isValueEqualityComparison(SI)) { - // If we only have one predecessor, and if it is a branch on this value, - // see if that predecessor totally determines the outcome of this switch. - if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) - if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) - return requestResimplify(); - - Value *Cond = SI->getCondition(); - if (SelectInst *Select = dyn_cast<SelectInst>(Cond)) - if (SimplifySwitchOnSelect(SI, Select)) - return requestResimplify(); - - // If the block only contains the switch, see if we can fold the block - // away into any preds. - if (SI == &*BB->instructionsWithoutDebug().begin()) - if (FoldValueComparisonIntoPredecessors(SI, Builder)) - return requestResimplify(); - } - - // Try to transform the switch into an icmp and a branch. - if (TurnSwitchRangeIntoICmp(SI, Builder)) - return requestResimplify(); - - // Remove unreachable cases. - if (eliminateDeadSwitchCases(SI, Options.AC, DL)) - return requestResimplify(); - - if (switchToSelect(SI, Builder, DL, TTI)) - return requestResimplify(); - - if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI)) - return requestResimplify(); - - // The conversion from switch to lookup tables results in difficult-to-analyze - // code and makes pruning branches much harder. This is a problem if the - // switch expression itself can still be restricted as a result of inlining or - // CVP. Therefore, only apply this transformation during late stages of the - // optimisation pipeline. - if (Options.ConvertSwitchToLookupTable && - SwitchToLookupTable(SI, Builder, DL, TTI)) - return requestResimplify(); - - if (ReduceSwitchRange(SI, Builder, DL, TTI)) - return requestResimplify(); - - return false; -} - -bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { - BasicBlock *BB = IBI->getParent(); - bool Changed = false; - - // Eliminate redundant destinations. - SmallPtrSet<Value *, 8> Succs; - for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { - BasicBlock *Dest = IBI->getDestination(i); - if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) { - Dest->removePredecessor(BB); - IBI->removeDestination(i); - --i; - --e; - Changed = true; - } - } - - if (IBI->getNumDestinations() == 0) { - // If the indirectbr has no successors, change it to unreachable. - new UnreachableInst(IBI->getContext(), IBI); - EraseTerminatorAndDCECond(IBI); - return true; - } - - if (IBI->getNumDestinations() == 1) { - // If the indirectbr has one successor, change it to a direct branch. - BranchInst::Create(IBI->getDestination(0), IBI); - EraseTerminatorAndDCECond(IBI); - return true; - } - - if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) { - if (SimplifyIndirectBrOnSelect(IBI, SI)) - return requestResimplify(); - } - return Changed; -} - -/// Given an block with only a single landing pad and a unconditional branch -/// try to find another basic block which this one can be merged with. This -/// handles cases where we have multiple invokes with unique landing pads, but -/// a shared handler. -/// -/// We specifically choose to not worry about merging non-empty blocks -/// here. That is a PRE/scheduling problem and is best solved elsewhere. In -/// practice, the optimizer produces empty landing pad blocks quite frequently -/// when dealing with exception dense code. (see: instcombine, gvn, if-else -/// sinking in this file) -/// -/// This is primarily a code size optimization. We need to avoid performing -/// any transform which might inhibit optimization (such as our ability to -/// specialize a particular handler via tail commoning). We do this by not -/// merging any blocks which require us to introduce a phi. Since the same -/// values are flowing through both blocks, we don't lose any ability to -/// specialize. If anything, we make such specialization more likely. -/// -/// TODO - This transformation could remove entries from a phi in the target -/// block when the inputs in the phi are the same for the two blocks being -/// merged. In some cases, this could result in removal of the PHI entirely. -static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, - BasicBlock *BB) { - auto Succ = BB->getUniqueSuccessor(); - assert(Succ); - // If there's a phi in the successor block, we'd likely have to introduce - // a phi into the merged landing pad block. - if (isa<PHINode>(*Succ->begin())) - return false; - - for (BasicBlock *OtherPred : predecessors(Succ)) { - if (BB == OtherPred) - continue; - BasicBlock::iterator I = OtherPred->begin(); - LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I); - if (!LPad2 || !LPad2->isIdenticalTo(LPad)) - continue; - for (++I; isa<DbgInfoIntrinsic>(I); ++I) - ; - BranchInst *BI2 = dyn_cast<BranchInst>(I); - if (!BI2 || !BI2->isIdenticalTo(BI)) - continue; - - // We've found an identical block. Update our predecessors to take that - // path instead and make ourselves dead. - SmallPtrSet<BasicBlock *, 16> Preds; - Preds.insert(pred_begin(BB), pred_end(BB)); - for (BasicBlock *Pred : Preds) { - InvokeInst *II = cast<InvokeInst>(Pred->getTerminator()); - assert(II->getNormalDest() != BB && II->getUnwindDest() == BB && - "unexpected successor"); - II->setUnwindDest(OtherPred); - } - - // The debug info in OtherPred doesn't cover the merged control flow that - // used to go through BB. We need to delete it or update it. - for (auto I = OtherPred->begin(), E = OtherPred->end(); I != E;) { - Instruction &Inst = *I; - I++; - if (isa<DbgInfoIntrinsic>(Inst)) - Inst.eraseFromParent(); - } - - SmallPtrSet<BasicBlock *, 16> Succs; - Succs.insert(succ_begin(BB), succ_end(BB)); - for (BasicBlock *Succ : Succs) { - Succ->removePredecessor(BB); - } - - IRBuilder<> Builder(BI); - Builder.CreateUnreachable(); - BI->eraseFromParent(); - return true; - } - return false; -} - -bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, - IRBuilder<> &Builder) { - BasicBlock *BB = BI->getParent(); - BasicBlock *Succ = BI->getSuccessor(0); - - // If the Terminator is the only non-phi instruction, simplify the block. - // If LoopHeader is provided, check if the block or its successor is a loop - // header. (This is for early invocations before loop simplify and - // vectorization to keep canonical loop forms for nested loops. These blocks - // can be eliminated when the pass is invoked later in the back-end.) - // Note that if BB has only one predecessor then we do not introduce new - // backedge, so we can eliminate BB. - bool NeedCanonicalLoop = - Options.NeedCanonicalLoop && - (LoopHeaders && BB->hasNPredecessorsOrMore(2) && - (LoopHeaders->count(BB) || LoopHeaders->count(Succ))); - BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); - if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && - !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB)) - return true; - - // If the only instruction in the block is a seteq/setne comparison against a - // constant, try to simplify the block. - if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) - if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) { - for (++I; isa<DbgInfoIntrinsic>(I); ++I) - ; - if (I->isTerminator() && - tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder)) - return true; - } - - // See if we can merge an empty landing pad block with another which is - // equivalent. - if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) { - for (++I; isa<DbgInfoIntrinsic>(I); ++I) - ; - if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB)) - return true; - } - - // If this basic block is ONLY a compare and a branch, and if a predecessor - // branches to us and our successor, fold the comparison into the - // predecessor and use logical operations to update the incoming value - // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold)) - return requestResimplify(); - return false; -} - -static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) { - BasicBlock *PredPred = nullptr; - for (auto *P : predecessors(BB)) { - BasicBlock *PPred = P->getSinglePredecessor(); - if (!PPred || (PredPred && PredPred != PPred)) - return nullptr; - PredPred = PPred; - } - return PredPred; -} - -bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { - BasicBlock *BB = BI->getParent(); - const Function *Fn = BB->getParent(); - if (Fn && Fn->hasFnAttribute(Attribute::OptForFuzzing)) - return false; - - // Conditional branch - if (isValueEqualityComparison(BI)) { - // If we only have one predecessor, and if it is a branch on this value, - // see if that predecessor totally determines the outcome of this - // switch. - if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) - if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) - return requestResimplify(); - - // This block must be empty, except for the setcond inst, if it exists. - // Ignore dbg intrinsics. - auto I = BB->instructionsWithoutDebug().begin(); - if (&*I == BI) { - if (FoldValueComparisonIntoPredecessors(BI, Builder)) - return requestResimplify(); - } else if (&*I == cast<Instruction>(BI->getCondition())) { - ++I; - if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) - return requestResimplify(); - } - } - - // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. - if (SimplifyBranchOnICmpChain(BI, Builder, DL)) - return true; - - // If this basic block has dominating predecessor blocks and the dominating - // blocks' conditions imply BI's condition, we know the direction of BI. - Optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL); - if (Imp) { - // Turn this into a branch on constant. - auto *OldCond = BI->getCondition(); - ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext()) - : ConstantInt::getFalse(BB->getContext()); - BI->setCondition(TorF); - RecursivelyDeleteTriviallyDeadInstructions(OldCond); - return requestResimplify(); - } - - // If this basic block is ONLY a compare and a branch, and if a predecessor - // branches to us and one of our successors, fold the comparison into the - // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold)) - return requestResimplify(); - - // We have a conditional branch to two blocks that are only reachable - // from BI. We know that the condbr dominates the two blocks, so see if - // there is any identical code in the "then" and "else" blocks. If so, we - // can hoist it up to the branching block. - if (BI->getSuccessor(0)->getSinglePredecessor()) { - if (BI->getSuccessor(1)->getSinglePredecessor()) { - if (HoistThenElseCodeToIf(BI, TTI)) - return requestResimplify(); - } else { - // If Successor #1 has multiple preds, we may be able to conditionally - // execute Successor #0 if it branches to Successor #1. - Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator(); - if (Succ0TI->getNumSuccessors() == 1 && - Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI)) - return requestResimplify(); - } - } else if (BI->getSuccessor(1)->getSinglePredecessor()) { - // If Successor #0 has multiple preds, we may be able to conditionally - // execute Successor #1 if it branches to Successor #0. - Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator(); - if (Succ1TI->getNumSuccessors() == 1 && - Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI)) - return requestResimplify(); - } - - // If this is a branch on a phi node in the current block, thread control - // through this block if any PHI node entries are constants. - if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) - if (PN->getParent() == BI->getParent()) - if (FoldCondBranchOnPHI(BI, DL, Options.AC)) - return requestResimplify(); - - // Scan predecessor blocks for conditional branches. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) - if (PBI != BI && PBI->isConditional()) - if (SimplifyCondBranchToCondBranch(PBI, BI, DL)) - return requestResimplify(); - - // Look for diamond patterns. - if (MergeCondStores) - if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB)) - if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator())) - if (PBI != BI && PBI->isConditional()) - if (mergeConditionalStores(PBI, BI, DL)) - return requestResimplify(); - - return false; -} - -/// Check if passing a value to an instruction will cause undefined behavior. -static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { - Constant *C = dyn_cast<Constant>(V); - if (!C) - return false; - - if (I->use_empty()) - return false; - - if (C->isNullValue() || isa<UndefValue>(C)) { - // Only look at the first use, avoid hurting compile time with long uselists - User *Use = *I->user_begin(); - - // Now make sure that there are no instructions in between that can alter - // control flow (eg. calls) - for (BasicBlock::iterator - i = ++BasicBlock::iterator(I), - UI = BasicBlock::iterator(dyn_cast<Instruction>(Use)); - i != UI; ++i) - if (i == I->getParent()->end() || i->mayHaveSideEffects()) - return false; - - // Look through GEPs. A load from a GEP derived from NULL is still undefined - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use)) - if (GEP->getPointerOperand() == I) - return passingValueIsAlwaysUndefined(V, GEP); - - // Look through bitcasts. - if (BitCastInst *BC = dyn_cast<BitCastInst>(Use)) - return passingValueIsAlwaysUndefined(V, BC); - - // Load from null is undefined. - if (LoadInst *LI = dyn_cast<LoadInst>(Use)) - if (!LI->isVolatile()) - return !NullPointerIsDefined(LI->getFunction(), - LI->getPointerAddressSpace()); - - // Store to null is undefined. - if (StoreInst *SI = dyn_cast<StoreInst>(Use)) - if (!SI->isVolatile()) - return (!NullPointerIsDefined(SI->getFunction(), - SI->getPointerAddressSpace())) && - SI->getPointerOperand() == I; - - // A call to null is undefined. - if (auto CS = CallSite(Use)) - return !NullPointerIsDefined(CS->getFunction()) && - CS.getCalledValue() == I; - } - return false; -} - -/// If BB has an incoming value that will always trigger undefined behavior -/// (eg. null pointer dereference), remove the branch leading here. -static bool removeUndefIntroducingPredecessor(BasicBlock *BB) { - for (PHINode &PHI : BB->phis()) - for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) - if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) { - Instruction *T = PHI.getIncomingBlock(i)->getTerminator(); - IRBuilder<> Builder(T); - if (BranchInst *BI = dyn_cast<BranchInst>(T)) { - BB->removePredecessor(PHI.getIncomingBlock(i)); - // Turn uncoditional branches into unreachables and remove the dead - // destination from conditional branches. - if (BI->isUnconditional()) - Builder.CreateUnreachable(); - else - Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) - : BI->getSuccessor(0)); - BI->eraseFromParent(); - return true; - } - // TODO: SwitchInst. - } - - return false; -} - -bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { - bool Changed = false; - - assert(BB && BB->getParent() && "Block not embedded in function!"); - assert(BB->getTerminator() && "Degenerate basic block encountered!"); - - // Remove basic blocks that have no predecessors (except the entry block)... - // or that just have themself as a predecessor. These are unreachable. - if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) || - BB->getSinglePredecessor() == BB) { - LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB); - DeleteDeadBlock(BB); - return true; - } - - // Check to see if we can constant propagate this terminator instruction - // away... - Changed |= ConstantFoldTerminator(BB, true); - - // Check for and eliminate duplicate PHI nodes in this block. - Changed |= EliminateDuplicatePHINodes(BB); - - // Check for and remove branches that will always cause undefined behavior. - Changed |= removeUndefIntroducingPredecessor(BB); - - // Merge basic blocks into their predecessor if there is only one distinct - // pred, and if there is only one distinct successor of the predecessor, and - // if there are no PHI nodes. - if (MergeBlockIntoPredecessor(BB)) - return true; - - if (SinkCommon && Options.SinkCommonInsts) - Changed |= SinkCommonCodeFromPredecessors(BB); - - IRBuilder<> Builder(BB); - - // If there is a trivial two-entry PHI node in this basic block, and we can - // eliminate it, do so now. - if (auto *PN = dyn_cast<PHINode>(BB->begin())) - if (PN->getNumIncomingValues() == 2) - Changed |= FoldTwoEntryPHINode(PN, TTI, DL); - - Builder.SetInsertPoint(BB->getTerminator()); - if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator())) { - if (BI->isUnconditional()) { - if (SimplifyUncondBranch(BI, Builder)) - return true; - } else { - if (SimplifyCondBranch(BI, Builder)) - return true; - } - } else if (auto *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { - if (SimplifyReturn(RI, Builder)) - return true; - } else if (auto *RI = dyn_cast<ResumeInst>(BB->getTerminator())) { - if (SimplifyResume(RI, Builder)) - return true; - } else if (auto *RI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) { - if (SimplifyCleanupReturn(RI)) - return true; - } else if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { - if (SimplifySwitch(SI, Builder)) - return true; - } else if (auto *UI = dyn_cast<UnreachableInst>(BB->getTerminator())) { - if (SimplifyUnreachable(UI)) - return true; - } else if (auto *IBI = dyn_cast<IndirectBrInst>(BB->getTerminator())) { - if (SimplifyIndirectBr(IBI)) - return true; - } - - return Changed; -} - -bool SimplifyCFGOpt::run(BasicBlock *BB) { - bool Changed = false; - - // Repeated simplify BB as long as resimplification is requested. - do { - Resimplify = false; - - // Perform one round of simplifcation. Resimplify flag will be set if - // another iteration is requested. - Changed |= simplifyOnce(BB); - } while (Resimplify); - - return Changed; -} - -bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - const SimplifyCFGOptions &Options, - SmallPtrSetImpl<BasicBlock *> *LoopHeaders) { - return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), LoopHeaders, - Options) - .run(BB); -} diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp deleted file mode 100644 index cbb114f9a47a..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ /dev/null @@ -1,957 +0,0 @@ -//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements induction variable simplification. It does -// not define any actual pass or policy, but provides a single function to -// simplify a loop's induction variables based on ScalarEvolution. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/SimplifyIndVar.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/Local.h" - -using namespace llvm; - -#define DEBUG_TYPE "indvars" - -STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); -STATISTIC(NumElimOperand, "Number of IV operands folded into a use"); -STATISTIC(NumFoldedUser, "Number of IV users folded into a constant"); -STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); -STATISTIC( - NumSimplifiedSDiv, - "Number of IV signed division operations converted to unsigned division"); -STATISTIC( - NumSimplifiedSRem, - "Number of IV signed remainder operations converted to unsigned remainder"); -STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); - -namespace { - /// This is a utility for simplifying induction variables - /// based on ScalarEvolution. It is the primary instrument of the - /// IndvarSimplify pass, but it may also be directly invoked to cleanup after - /// other loop passes that preserve SCEV. - class SimplifyIndvar { - Loop *L; - LoopInfo *LI; - ScalarEvolution *SE; - DominatorTree *DT; - SCEVExpander &Rewriter; - SmallVectorImpl<WeakTrackingVH> &DeadInsts; - - bool Changed; - - public: - SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SCEVExpander &Rewriter, - SmallVectorImpl<WeakTrackingVH> &Dead) - : L(Loop), LI(LI), SE(SE), DT(DT), Rewriter(Rewriter), DeadInsts(Dead), - Changed(false) { - assert(LI && "IV simplification requires LoopInfo"); - } - - bool hasChanged() const { return Changed; } - - /// Iteratively perform simplification on a worklist of users of the - /// specified induction variable. This is the top-level driver that applies - /// all simplifications to users of an IV. - void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr); - - Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand); - - bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand); - bool replaceIVUserWithLoopInvariant(Instruction *UseInst); - - bool eliminateOverflowIntrinsic(WithOverflowInst *WO); - bool eliminateSaturatingIntrinsic(SaturatingInst *SI); - bool eliminateTrunc(TruncInst *TI); - bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); - bool makeIVComparisonInvariant(ICmpInst *ICmp, Value *IVOperand); - void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); - void simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand, - bool IsSigned); - void replaceRemWithNumerator(BinaryOperator *Rem); - void replaceRemWithNumeratorOrZero(BinaryOperator *Rem); - void replaceSRemWithURem(BinaryOperator *Rem); - bool eliminateSDiv(BinaryOperator *SDiv); - bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand); - bool strengthenRightShift(BinaryOperator *BO, Value *IVOperand); - }; -} - -/// Fold an IV operand into its use. This removes increments of an -/// aligned IV when used by a instruction that ignores the low bits. -/// -/// IVOperand is guaranteed SCEVable, but UseInst may not be. -/// -/// Return the operand of IVOperand for this induction variable if IVOperand can -/// be folded (in case more folding opportunities have been exposed). -/// Otherwise return null. -Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) { - Value *IVSrc = nullptr; - const unsigned OperIdx = 0; - const SCEV *FoldedExpr = nullptr; - bool MustDropExactFlag = false; - switch (UseInst->getOpcode()) { - default: - return nullptr; - case Instruction::UDiv: - case Instruction::LShr: - // We're only interested in the case where we know something about - // the numerator and have a constant denominator. - if (IVOperand != UseInst->getOperand(OperIdx) || - !isa<ConstantInt>(UseInst->getOperand(1))) - return nullptr; - - // Attempt to fold a binary operator with constant operand. - // e.g. ((I + 1) >> 2) => I >> 2 - if (!isa<BinaryOperator>(IVOperand) - || !isa<ConstantInt>(IVOperand->getOperand(1))) - return nullptr; - - IVSrc = IVOperand->getOperand(0); - // IVSrc must be the (SCEVable) IV, since the other operand is const. - assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand"); - - ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1)); - if (UseInst->getOpcode() == Instruction::LShr) { - // Get a constant for the divisor. See createSCEV. - uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth(); - if (D->getValue().uge(BitWidth)) - return nullptr; - - D = ConstantInt::get(UseInst->getContext(), - APInt::getOneBitSet(BitWidth, D->getZExtValue())); - } - FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D)); - // We might have 'exact' flag set at this point which will no longer be - // correct after we make the replacement. - if (UseInst->isExact() && - SE->getSCEV(IVSrc) != SE->getMulExpr(FoldedExpr, SE->getSCEV(D))) - MustDropExactFlag = true; - } - // We have something that might fold it's operand. Compare SCEVs. - if (!SE->isSCEVable(UseInst->getType())) - return nullptr; - - // Bypass the operand if SCEV can prove it has no effect. - if (SE->getSCEV(UseInst) != FoldedExpr) - return nullptr; - - LLVM_DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand - << " -> " << *UseInst << '\n'); - - UseInst->setOperand(OperIdx, IVSrc); - assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper"); - - if (MustDropExactFlag) - UseInst->dropPoisonGeneratingFlags(); - - ++NumElimOperand; - Changed = true; - if (IVOperand->use_empty()) - DeadInsts.emplace_back(IVOperand); - return IVSrc; -} - -bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp, - Value *IVOperand) { - unsigned IVOperIdx = 0; - ICmpInst::Predicate Pred = ICmp->getPredicate(); - if (IVOperand != ICmp->getOperand(0)) { - // Swapped - assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); - IVOperIdx = 1; - Pred = ICmpInst::getSwappedPredicate(Pred); - } - - // Get the SCEVs for the ICmp operands (in the specific context of the - // current loop) - const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); - const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop); - const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop); - - ICmpInst::Predicate InvariantPredicate; - const SCEV *InvariantLHS, *InvariantRHS; - - auto *PN = dyn_cast<PHINode>(IVOperand); - if (!PN) - return false; - if (!SE->isLoopInvariantPredicate(Pred, S, X, L, InvariantPredicate, - InvariantLHS, InvariantRHS)) - return false; - - // Rewrite the comparison to a loop invariant comparison if it can be done - // cheaply, where cheaply means "we don't need to emit any new - // instructions". - - SmallDenseMap<const SCEV*, Value*> CheapExpansions; - CheapExpansions[S] = ICmp->getOperand(IVOperIdx); - CheapExpansions[X] = ICmp->getOperand(1 - IVOperIdx); - - // TODO: Support multiple entry loops? (We currently bail out of these in - // the IndVarSimplify pass) - if (auto *BB = L->getLoopPredecessor()) { - const int Idx = PN->getBasicBlockIndex(BB); - if (Idx >= 0) { - Value *Incoming = PN->getIncomingValue(Idx); - const SCEV *IncomingS = SE->getSCEV(Incoming); - CheapExpansions[IncomingS] = Incoming; - } - } - Value *NewLHS = CheapExpansions[InvariantLHS]; - Value *NewRHS = CheapExpansions[InvariantRHS]; - - if (!NewLHS) - if (auto *ConstLHS = dyn_cast<SCEVConstant>(InvariantLHS)) - NewLHS = ConstLHS->getValue(); - if (!NewRHS) - if (auto *ConstRHS = dyn_cast<SCEVConstant>(InvariantRHS)) - NewRHS = ConstRHS->getValue(); - - if (!NewLHS || !NewRHS) - // We could not find an existing value to replace either LHS or RHS. - // Generating new instructions has subtler tradeoffs, so avoid doing that - // for now. - return false; - - LLVM_DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n'); - ICmp->setPredicate(InvariantPredicate); - ICmp->setOperand(0, NewLHS); - ICmp->setOperand(1, NewRHS); - return true; -} - -/// SimplifyIVUsers helper for eliminating useless -/// comparisons against an induction variable. -void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { - unsigned IVOperIdx = 0; - ICmpInst::Predicate Pred = ICmp->getPredicate(); - ICmpInst::Predicate OriginalPred = Pred; - if (IVOperand != ICmp->getOperand(0)) { - // Swapped - assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); - IVOperIdx = 1; - Pred = ICmpInst::getSwappedPredicate(Pred); - } - - // Get the SCEVs for the ICmp operands (in the specific context of the - // current loop) - const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); - const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop); - const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop); - - // If the condition is always true or always false, replace it with - // a constant value. - if (SE->isKnownPredicate(Pred, S, X)) { - ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext())); - DeadInsts.emplace_back(ICmp); - LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); - } else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) { - ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext())); - DeadInsts.emplace_back(ICmp); - LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); - } else if (makeIVComparisonInvariant(ICmp, IVOperand)) { - // fallthrough to end of function - } else if (ICmpInst::isSigned(OriginalPred) && - SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) { - // If we were unable to make anything above, all we can is to canonicalize - // the comparison hoping that it will open the doors for other - // optimizations. If we find out that we compare two non-negative values, - // we turn the instruction's predicate to its unsigned version. Note that - // we cannot rely on Pred here unless we check if we have swapped it. - assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?"); - LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp - << '\n'); - ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred)); - } else - return; - - ++NumElimCmp; - Changed = true; -} - -bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) { - // Get the SCEVs for the ICmp operands. - auto *N = SE->getSCEV(SDiv->getOperand(0)); - auto *D = SE->getSCEV(SDiv->getOperand(1)); - - // Simplify unnecessary loops away. - const Loop *L = LI->getLoopFor(SDiv->getParent()); - N = SE->getSCEVAtScope(N, L); - D = SE->getSCEVAtScope(D, L); - - // Replace sdiv by udiv if both of the operands are non-negative - if (SE->isKnownNonNegative(N) && SE->isKnownNonNegative(D)) { - auto *UDiv = BinaryOperator::Create( - BinaryOperator::UDiv, SDiv->getOperand(0), SDiv->getOperand(1), - SDiv->getName() + ".udiv", SDiv); - UDiv->setIsExact(SDiv->isExact()); - SDiv->replaceAllUsesWith(UDiv); - LLVM_DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n'); - ++NumSimplifiedSDiv; - Changed = true; - DeadInsts.push_back(SDiv); - return true; - } - - return false; -} - -// i %s n -> i %u n if i >= 0 and n >= 0 -void SimplifyIndvar::replaceSRemWithURem(BinaryOperator *Rem) { - auto *N = Rem->getOperand(0), *D = Rem->getOperand(1); - auto *URem = BinaryOperator::Create(BinaryOperator::URem, N, D, - Rem->getName() + ".urem", Rem); - Rem->replaceAllUsesWith(URem); - LLVM_DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem << '\n'); - ++NumSimplifiedSRem; - Changed = true; - DeadInsts.emplace_back(Rem); -} - -// i % n --> i if i is in [0,n). -void SimplifyIndvar::replaceRemWithNumerator(BinaryOperator *Rem) { - Rem->replaceAllUsesWith(Rem->getOperand(0)); - LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); - ++NumElimRem; - Changed = true; - DeadInsts.emplace_back(Rem); -} - -// (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). -void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator *Rem) { - auto *T = Rem->getType(); - auto *N = Rem->getOperand(0), *D = Rem->getOperand(1); - ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, N, D); - SelectInst *Sel = - SelectInst::Create(ICmp, ConstantInt::get(T, 0), N, "iv.rem", Rem); - Rem->replaceAllUsesWith(Sel); - LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); - ++NumElimRem; - Changed = true; - DeadInsts.emplace_back(Rem); -} - -/// SimplifyIVUsers helper for eliminating useless remainder operations -/// operating on an induction variable or replacing srem by urem. -void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand, - bool IsSigned) { - auto *NValue = Rem->getOperand(0); - auto *DValue = Rem->getOperand(1); - // We're only interested in the case where we know something about - // the numerator, unless it is a srem, because we want to replace srem by urem - // in general. - bool UsedAsNumerator = IVOperand == NValue; - if (!UsedAsNumerator && !IsSigned) - return; - - const SCEV *N = SE->getSCEV(NValue); - - // Simplify unnecessary loops away. - const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); - N = SE->getSCEVAtScope(N, ICmpLoop); - - bool IsNumeratorNonNegative = !IsSigned || SE->isKnownNonNegative(N); - - // Do not proceed if the Numerator may be negative - if (!IsNumeratorNonNegative) - return; - - const SCEV *D = SE->getSCEV(DValue); - D = SE->getSCEVAtScope(D, ICmpLoop); - - if (UsedAsNumerator) { - auto LT = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; - if (SE->isKnownPredicate(LT, N, D)) { - replaceRemWithNumerator(Rem); - return; - } - - auto *T = Rem->getType(); - const auto *NLessOne = SE->getMinusSCEV(N, SE->getOne(T)); - if (SE->isKnownPredicate(LT, NLessOne, D)) { - replaceRemWithNumeratorOrZero(Rem); - return; - } - } - - // Try to replace SRem with URem, if both N and D are known non-negative. - // Since we had already check N, we only need to check D now - if (!IsSigned || !SE->isKnownNonNegative(D)) - return; - - replaceSRemWithURem(Rem); -} - -static bool willNotOverflow(ScalarEvolution *SE, Instruction::BinaryOps BinOp, - bool Signed, const SCEV *LHS, const SCEV *RHS) { - const SCEV *(ScalarEvolution::*Operation)(const SCEV *, const SCEV *, - SCEV::NoWrapFlags, unsigned); - switch (BinOp) { - default: - llvm_unreachable("Unsupported binary op"); - case Instruction::Add: - Operation = &ScalarEvolution::getAddExpr; - break; - case Instruction::Sub: - Operation = &ScalarEvolution::getMinusSCEV; - break; - case Instruction::Mul: - Operation = &ScalarEvolution::getMulExpr; - break; - } - - const SCEV *(ScalarEvolution::*Extension)(const SCEV *, Type *, unsigned) = - Signed ? &ScalarEvolution::getSignExtendExpr - : &ScalarEvolution::getZeroExtendExpr; - - // Check ext(LHS op RHS) == ext(LHS) op ext(RHS) - auto *NarrowTy = cast<IntegerType>(LHS->getType()); - auto *WideTy = - IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2); - - const SCEV *A = - (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0), - WideTy, 0); - const SCEV *B = - (SE->*Operation)((SE->*Extension)(LHS, WideTy, 0), - (SE->*Extension)(RHS, WideTy, 0), SCEV::FlagAnyWrap, 0); - return A == B; -} - -bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) { - const SCEV *LHS = SE->getSCEV(WO->getLHS()); - const SCEV *RHS = SE->getSCEV(WO->getRHS()); - if (!willNotOverflow(SE, WO->getBinaryOp(), WO->isSigned(), LHS, RHS)) - return false; - - // Proved no overflow, nuke the overflow check and, if possible, the overflow - // intrinsic as well. - - BinaryOperator *NewResult = BinaryOperator::Create( - WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), "", WO); - - if (WO->isSigned()) - NewResult->setHasNoSignedWrap(true); - else - NewResult->setHasNoUnsignedWrap(true); - - SmallVector<ExtractValueInst *, 4> ToDelete; - - for (auto *U : WO->users()) { - if (auto *EVI = dyn_cast<ExtractValueInst>(U)) { - if (EVI->getIndices()[0] == 1) - EVI->replaceAllUsesWith(ConstantInt::getFalse(WO->getContext())); - else { - assert(EVI->getIndices()[0] == 0 && "Only two possibilities!"); - EVI->replaceAllUsesWith(NewResult); - } - ToDelete.push_back(EVI); - } - } - - for (auto *EVI : ToDelete) - EVI->eraseFromParent(); - - if (WO->use_empty()) - WO->eraseFromParent(); - - return true; -} - -bool SimplifyIndvar::eliminateSaturatingIntrinsic(SaturatingInst *SI) { - const SCEV *LHS = SE->getSCEV(SI->getLHS()); - const SCEV *RHS = SE->getSCEV(SI->getRHS()); - if (!willNotOverflow(SE, SI->getBinaryOp(), SI->isSigned(), LHS, RHS)) - return false; - - BinaryOperator *BO = BinaryOperator::Create( - SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI); - if (SI->isSigned()) - BO->setHasNoSignedWrap(); - else - BO->setHasNoUnsignedWrap(); - - SI->replaceAllUsesWith(BO); - DeadInsts.emplace_back(SI); - Changed = true; - return true; -} - -bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) { - // It is always legal to replace - // icmp <pred> i32 trunc(iv), n - // with - // icmp <pred> i64 sext(trunc(iv)), sext(n), if pred is signed predicate. - // Or with - // icmp <pred> i64 zext(trunc(iv)), zext(n), if pred is unsigned predicate. - // Or with either of these if pred is an equality predicate. - // - // If we can prove that iv == sext(trunc(iv)) or iv == zext(trunc(iv)) for - // every comparison which uses trunc, it means that we can replace each of - // them with comparison of iv against sext/zext(n). We no longer need trunc - // after that. - // - // TODO: Should we do this if we can widen *some* comparisons, but not all - // of them? Sometimes it is enough to enable other optimizations, but the - // trunc instruction will stay in the loop. - Value *IV = TI->getOperand(0); - Type *IVTy = IV->getType(); - const SCEV *IVSCEV = SE->getSCEV(IV); - const SCEV *TISCEV = SE->getSCEV(TI); - - // Check if iv == zext(trunc(iv)) and if iv == sext(trunc(iv)). If so, we can - // get rid of trunc - bool DoesSExtCollapse = false; - bool DoesZExtCollapse = false; - if (IVSCEV == SE->getSignExtendExpr(TISCEV, IVTy)) - DoesSExtCollapse = true; - if (IVSCEV == SE->getZeroExtendExpr(TISCEV, IVTy)) - DoesZExtCollapse = true; - - // If neither sext nor zext does collapse, it is not profitable to do any - // transform. Bail. - if (!DoesSExtCollapse && !DoesZExtCollapse) - return false; - - // Collect users of the trunc that look like comparisons against invariants. - // Bail if we find something different. - SmallVector<ICmpInst *, 4> ICmpUsers; - for (auto *U : TI->users()) { - // We don't care about users in unreachable blocks. - if (isa<Instruction>(U) && - !DT->isReachableFromEntry(cast<Instruction>(U)->getParent())) - continue; - ICmpInst *ICI = dyn_cast<ICmpInst>(U); - if (!ICI) return false; - assert(L->contains(ICI->getParent()) && "LCSSA form broken?"); - if (!(ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) && - !(ICI->getOperand(1) == TI && L->isLoopInvariant(ICI->getOperand(0)))) - return false; - // If we cannot get rid of trunc, bail. - if (ICI->isSigned() && !DoesSExtCollapse) - return false; - if (ICI->isUnsigned() && !DoesZExtCollapse) - return false; - // For equality, either signed or unsigned works. - ICmpUsers.push_back(ICI); - } - - auto CanUseZExt = [&](ICmpInst *ICI) { - // Unsigned comparison can be widened as unsigned. - if (ICI->isUnsigned()) - return true; - // Is it profitable to do zext? - if (!DoesZExtCollapse) - return false; - // For equality, we can safely zext both parts. - if (ICI->isEquality()) - return true; - // Otherwise we can only use zext when comparing two non-negative or two - // negative values. But in practice, we will never pass DoesZExtCollapse - // check for a negative value, because zext(trunc(x)) is non-negative. So - // it only make sense to check for non-negativity here. - const SCEV *SCEVOP1 = SE->getSCEV(ICI->getOperand(0)); - const SCEV *SCEVOP2 = SE->getSCEV(ICI->getOperand(1)); - return SE->isKnownNonNegative(SCEVOP1) && SE->isKnownNonNegative(SCEVOP2); - }; - // Replace all comparisons against trunc with comparisons against IV. - for (auto *ICI : ICmpUsers) { - bool IsSwapped = L->isLoopInvariant(ICI->getOperand(0)); - auto *Op1 = IsSwapped ? ICI->getOperand(0) : ICI->getOperand(1); - Instruction *Ext = nullptr; - // For signed/unsigned predicate, replace the old comparison with comparison - // of immediate IV against sext/zext of the invariant argument. If we can - // use either sext or zext (i.e. we are dealing with equality predicate), - // then prefer zext as a more canonical form. - // TODO: If we see a signed comparison which can be turned into unsigned, - // we can do it here for canonicalization purposes. - ICmpInst::Predicate Pred = ICI->getPredicate(); - if (IsSwapped) Pred = ICmpInst::getSwappedPredicate(Pred); - if (CanUseZExt(ICI)) { - assert(DoesZExtCollapse && "Unprofitable zext?"); - Ext = new ZExtInst(Op1, IVTy, "zext", ICI); - Pred = ICmpInst::getUnsignedPredicate(Pred); - } else { - assert(DoesSExtCollapse && "Unprofitable sext?"); - Ext = new SExtInst(Op1, IVTy, "sext", ICI); - assert(Pred == ICmpInst::getSignedPredicate(Pred) && "Must be signed!"); - } - bool Changed; - L->makeLoopInvariant(Ext, Changed); - (void)Changed; - ICmpInst *NewICI = new ICmpInst(ICI, Pred, IV, Ext); - ICI->replaceAllUsesWith(NewICI); - DeadInsts.emplace_back(ICI); - } - - // Trunc no longer needed. - TI->replaceAllUsesWith(UndefValue::get(TI->getType())); - DeadInsts.emplace_back(TI); - return true; -} - -/// Eliminate an operation that consumes a simple IV and has no observable -/// side-effect given the range of IV values. IVOperand is guaranteed SCEVable, -/// but UseInst may not be. -bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, - Instruction *IVOperand) { - if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { - eliminateIVComparison(ICmp, IVOperand); - return true; - } - if (BinaryOperator *Bin = dyn_cast<BinaryOperator>(UseInst)) { - bool IsSRem = Bin->getOpcode() == Instruction::SRem; - if (IsSRem || Bin->getOpcode() == Instruction::URem) { - simplifyIVRemainder(Bin, IVOperand, IsSRem); - return true; - } - - if (Bin->getOpcode() == Instruction::SDiv) - return eliminateSDiv(Bin); - } - - if (auto *WO = dyn_cast<WithOverflowInst>(UseInst)) - if (eliminateOverflowIntrinsic(WO)) - return true; - - if (auto *SI = dyn_cast<SaturatingInst>(UseInst)) - if (eliminateSaturatingIntrinsic(SI)) - return true; - - if (auto *TI = dyn_cast<TruncInst>(UseInst)) - if (eliminateTrunc(TI)) - return true; - - if (eliminateIdentitySCEV(UseInst, IVOperand)) - return true; - - return false; -} - -static Instruction *GetLoopInvariantInsertPosition(Loop *L, Instruction *Hint) { - if (auto *BB = L->getLoopPreheader()) - return BB->getTerminator(); - - return Hint; -} - -/// Replace the UseInst with a constant if possible. -bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) { - if (!SE->isSCEVable(I->getType())) - return false; - - // Get the symbolic expression for this instruction. - const SCEV *S = SE->getSCEV(I); - - if (!SE->isLoopInvariant(S, L)) - return false; - - // Do not generate something ridiculous even if S is loop invariant. - if (Rewriter.isHighCostExpansion(S, L, I)) - return false; - - auto *IP = GetLoopInvariantInsertPosition(L, I); - auto *Invariant = Rewriter.expandCodeFor(S, I->getType(), IP); - - I->replaceAllUsesWith(Invariant); - LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I - << " with loop invariant: " << *S << '\n'); - ++NumFoldedUser; - Changed = true; - DeadInsts.emplace_back(I); - return true; -} - -/// Eliminate any operation that SCEV can prove is an identity function. -bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst, - Instruction *IVOperand) { - if (!SE->isSCEVable(UseInst->getType()) || - (UseInst->getType() != IVOperand->getType()) || - (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) - return false; - - // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the - // dominator tree, even if X is an operand to Y. For instance, in - // - // %iv = phi i32 {0,+,1} - // br %cond, label %left, label %merge - // - // left: - // %X = add i32 %iv, 0 - // br label %merge - // - // merge: - // %M = phi (%X, %iv) - // - // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and - // %M.replaceAllUsesWith(%X) would be incorrect. - - if (isa<PHINode>(UseInst)) - // If UseInst is not a PHI node then we know that IVOperand dominates - // UseInst directly from the legality of SSA. - if (!DT || !DT->dominates(IVOperand, UseInst)) - return false; - - if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand)) - return false; - - LLVM_DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); - - UseInst->replaceAllUsesWith(IVOperand); - ++NumElimIdentity; - Changed = true; - DeadInsts.emplace_back(UseInst); - return true; -} - -/// Annotate BO with nsw / nuw if it provably does not signed-overflow / -/// unsigned-overflow. Returns true if anything changed, false otherwise. -bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, - Value *IVOperand) { - // Fastpath: we don't have any work to do if `BO` is `nuw` and `nsw`. - if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap()) - return false; - - if (BO->getOpcode() != Instruction::Add && - BO->getOpcode() != Instruction::Sub && - BO->getOpcode() != Instruction::Mul) - return false; - - const SCEV *LHS = SE->getSCEV(BO->getOperand(0)); - const SCEV *RHS = SE->getSCEV(BO->getOperand(1)); - bool Changed = false; - - if (!BO->hasNoUnsignedWrap() && - willNotOverflow(SE, BO->getOpcode(), /* Signed */ false, LHS, RHS)) { - BO->setHasNoUnsignedWrap(); - SE->forgetValue(BO); - Changed = true; - } - - if (!BO->hasNoSignedWrap() && - willNotOverflow(SE, BO->getOpcode(), /* Signed */ true, LHS, RHS)) { - BO->setHasNoSignedWrap(); - SE->forgetValue(BO); - Changed = true; - } - - return Changed; -} - -/// Annotate the Shr in (X << IVOperand) >> C as exact using the -/// information from the IV's range. Returns true if anything changed, false -/// otherwise. -bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO, - Value *IVOperand) { - using namespace llvm::PatternMatch; - - if (BO->getOpcode() == Instruction::Shl) { - bool Changed = false; - ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand)); - for (auto *U : BO->users()) { - const APInt *C; - if (match(U, - m_AShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C))) || - match(U, - m_LShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C)))) { - BinaryOperator *Shr = cast<BinaryOperator>(U); - if (!Shr->isExact() && IVRange.getUnsignedMin().uge(*C)) { - Shr->setIsExact(true); - Changed = true; - } - } - } - return Changed; - } - - return false; -} - -/// Add all uses of Def to the current IV's worklist. -static void pushIVUsers( - Instruction *Def, Loop *L, - SmallPtrSet<Instruction*,16> &Simplified, - SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) { - - for (User *U : Def->users()) { - Instruction *UI = cast<Instruction>(U); - - // Avoid infinite or exponential worklist processing. - // Also ensure unique worklist users. - // If Def is a LoopPhi, it may not be in the Simplified set, so check for - // self edges first. - if (UI == Def) - continue; - - // Only change the current Loop, do not change the other parts (e.g. other - // Loops). - if (!L->contains(UI)) - continue; - - // Do not push the same instruction more than once. - if (!Simplified.insert(UI).second) - continue; - - SimpleIVUsers.push_back(std::make_pair(UI, Def)); - } -} - -/// Return true if this instruction generates a simple SCEV -/// expression in terms of that IV. -/// -/// This is similar to IVUsers' isInteresting() but processes each instruction -/// non-recursively when the operand is already known to be a simpleIVUser. -/// -static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) { - if (!SE->isSCEVable(I->getType())) - return false; - - // Get the symbolic expression for this instruction. - const SCEV *S = SE->getSCEV(I); - - // Only consider affine recurrences. - const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S); - if (AR && AR->getLoop() == L) - return true; - - return false; -} - -/// Iteratively perform simplification on a worklist of users -/// of the specified induction variable. Each successive simplification may push -/// more users which may themselves be candidates for simplification. -/// -/// This algorithm does not require IVUsers analysis. Instead, it simplifies -/// instructions in-place during analysis. Rather than rewriting induction -/// variables bottom-up from their users, it transforms a chain of IVUsers -/// top-down, updating the IR only when it encounters a clear optimization -/// opportunity. -/// -/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers. -/// -void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { - if (!SE->isSCEVable(CurrIV->getType())) - return; - - // Instructions processed by SimplifyIndvar for CurrIV. - SmallPtrSet<Instruction*,16> Simplified; - - // Use-def pairs if IV users waiting to be processed for CurrIV. - SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers; - - // Push users of the current LoopPhi. In rare cases, pushIVUsers may be - // called multiple times for the same LoopPhi. This is the proper thing to - // do for loop header phis that use each other. - pushIVUsers(CurrIV, L, Simplified, SimpleIVUsers); - - while (!SimpleIVUsers.empty()) { - std::pair<Instruction*, Instruction*> UseOper = - SimpleIVUsers.pop_back_val(); - Instruction *UseInst = UseOper.first; - - // If a user of the IndVar is trivially dead, we prefer just to mark it dead - // rather than try to do some complex analysis or transformation (such as - // widening) basing on it. - // TODO: Propagate TLI and pass it here to handle more cases. - if (isInstructionTriviallyDead(UseInst, /* TLI */ nullptr)) { - DeadInsts.emplace_back(UseInst); - continue; - } - - // Bypass back edges to avoid extra work. - if (UseInst == CurrIV) continue; - - // Try to replace UseInst with a loop invariant before any other - // simplifications. - if (replaceIVUserWithLoopInvariant(UseInst)) - continue; - - Instruction *IVOperand = UseOper.second; - for (unsigned N = 0; IVOperand; ++N) { - assert(N <= Simplified.size() && "runaway iteration"); - - Value *NewOper = foldIVUser(UseInst, IVOperand); - if (!NewOper) - break; // done folding - IVOperand = dyn_cast<Instruction>(NewOper); - } - if (!IVOperand) - continue; - - if (eliminateIVUser(UseInst, IVOperand)) { - pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); - continue; - } - - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseInst)) { - if ((isa<OverflowingBinaryOperator>(BO) && - strengthenOverflowingOperation(BO, IVOperand)) || - (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand))) { - // re-queue uses of the now modified binary operator and fall - // through to the checks that remain. - pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); - } - } - - CastInst *Cast = dyn_cast<CastInst>(UseInst); - if (V && Cast) { - V->visitCast(Cast); - continue; - } - if (isSimpleIVUser(UseInst, L, SE)) { - pushIVUsers(UseInst, L, Simplified, SimpleIVUsers); - } - } -} - -namespace llvm { - -void IVVisitor::anchor() { } - -/// Simplify instructions that use this induction variable -/// by using ScalarEvolution to analyze the IV's recurrence. -bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead, - SCEVExpander &Rewriter, IVVisitor *V) { - SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Rewriter, - Dead); - SIV.simplifyUsers(CurrIV, V); - return SIV.hasChanged(); -} - -/// Simplify users of induction variables within this -/// loop. This does not actually change or add IVs. -bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead) { - SCEVExpander Rewriter(*SE, SE->getDataLayout(), "indvars"); -#ifndef NDEBUG - Rewriter.setDebugType(DEBUG_TYPE); -#endif - bool Changed = false; - for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { - Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead, Rewriter); - } - return Changed; -} - -} // namespace llvm diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp deleted file mode 100644 index e938ae6cb42f..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ /dev/null @@ -1,3159 +0,0 @@ -//===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the library calls simplifier. It does not implement -// any pass, but can't be used by other passes to do simplifications. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/SimplifyLibCalls.h" -#include "llvm/ADT/APSInt.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/Triple.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Analysis/Loads.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/KnownBits.h" -#include "llvm/Transforms/Utils/BuildLibCalls.h" -#include "llvm/Transforms/Utils/SizeOpts.h" - -using namespace llvm; -using namespace PatternMatch; - -static cl::opt<bool> - EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden, - cl::init(false), - cl::desc("Enable unsafe double to float " - "shrinking for math lib calls")); - - -//===----------------------------------------------------------------------===// -// Helper Functions -//===----------------------------------------------------------------------===// - -static bool ignoreCallingConv(LibFunc Func) { - return Func == LibFunc_abs || Func == LibFunc_labs || - Func == LibFunc_llabs || Func == LibFunc_strlen; -} - -static bool isCallingConvCCompatible(CallInst *CI) { - switch(CI->getCallingConv()) { - default: - return false; - case llvm::CallingConv::C: - return true; - case llvm::CallingConv::ARM_APCS: - case llvm::CallingConv::ARM_AAPCS: - case llvm::CallingConv::ARM_AAPCS_VFP: { - - // The iOS ABI diverges from the standard in some cases, so for now don't - // try to simplify those calls. - if (Triple(CI->getModule()->getTargetTriple()).isiOS()) - return false; - - auto *FuncTy = CI->getFunctionType(); - - if (!FuncTy->getReturnType()->isPointerTy() && - !FuncTy->getReturnType()->isIntegerTy() && - !FuncTy->getReturnType()->isVoidTy()) - return false; - - for (auto Param : FuncTy->params()) { - if (!Param->isPointerTy() && !Param->isIntegerTy()) - return false; - } - return true; - } - } - return false; -} - -/// Return true if it is only used in equality comparisons with With. -static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) { - for (User *U : V->users()) { - if (ICmpInst *IC = dyn_cast<ICmpInst>(U)) - if (IC->isEquality() && IC->getOperand(1) == With) - continue; - // Unknown instruction. - return false; - } - return true; -} - -static bool callHasFloatingPointArgument(const CallInst *CI) { - return any_of(CI->operands(), [](const Use &OI) { - return OI->getType()->isFloatingPointTy(); - }); -} - -static bool callHasFP128Argument(const CallInst *CI) { - return any_of(CI->operands(), [](const Use &OI) { - return OI->getType()->isFP128Ty(); - }); -} - -static Value *convertStrToNumber(CallInst *CI, StringRef &Str, int64_t Base) { - if (Base < 2 || Base > 36) - // handle special zero base - if (Base != 0) - return nullptr; - - char *End; - std::string nptr = Str.str(); - errno = 0; - long long int Result = strtoll(nptr.c_str(), &End, Base); - if (errno) - return nullptr; - - // if we assume all possible target locales are ASCII supersets, - // then if strtoll successfully parses a number on the host, - // it will also successfully parse the same way on the target - if (*End != '\0') - return nullptr; - - if (!isIntN(CI->getType()->getPrimitiveSizeInBits(), Result)) - return nullptr; - - return ConstantInt::get(CI->getType(), Result); -} - -static bool isLocallyOpenedFile(Value *File, CallInst *CI, IRBuilder<> &B, - const TargetLibraryInfo *TLI) { - CallInst *FOpen = dyn_cast<CallInst>(File); - if (!FOpen) - return false; - - Function *InnerCallee = FOpen->getCalledFunction(); - if (!InnerCallee) - return false; - - LibFunc Func; - if (!TLI->getLibFunc(*InnerCallee, Func) || !TLI->has(Func) || - Func != LibFunc_fopen) - return false; - - inferLibFuncAttributes(*CI->getCalledFunction(), *TLI); - if (PointerMayBeCaptured(File, true, true)) - return false; - - return true; -} - -static bool isOnlyUsedInComparisonWithZero(Value *V) { - for (User *U : V->users()) { - if (ICmpInst *IC = dyn_cast<ICmpInst>(U)) - if (Constant *C = dyn_cast<Constant>(IC->getOperand(1))) - if (C->isNullValue()) - continue; - // Unknown instruction. - return false; - } - return true; -} - -static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len, - const DataLayout &DL) { - if (!isOnlyUsedInComparisonWithZero(CI)) - return false; - - if (!isDereferenceableAndAlignedPointer(Str, 1, APInt(64, Len), DL)) - return false; - - if (CI->getFunction()->hasFnAttribute(Attribute::SanitizeMemory)) - return false; - - return true; -} - -//===----------------------------------------------------------------------===// -// String and Memory Library Call Optimizations -//===----------------------------------------------------------------------===// - -Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) { - // Extract some information from the instruction - Value *Dst = CI->getArgOperand(0); - Value *Src = CI->getArgOperand(1); - - // See if we can get the length of the input string. - uint64_t Len = GetStringLength(Src); - if (Len == 0) - return nullptr; - --Len; // Unbias length. - - // Handle the simple, do-nothing case: strcat(x, "") -> x - if (Len == 0) - return Dst; - - return emitStrLenMemCpy(Src, Dst, Len, B); -} - -Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, - IRBuilder<> &B) { - // We need to find the end of the destination string. That's where the - // memory is to be moved to. We just generate a call to strlen. - Value *DstLen = emitStrLen(Dst, B, DL, TLI); - if (!DstLen) - return nullptr; - - // Now that we have the destination's length, we must index into the - // destination's pointer to get the actual memcpy destination (end of - // the string .. we're concatenating). - Value *CpyDst = B.CreateGEP(B.getInt8Ty(), Dst, DstLen, "endptr"); - - // We have enough information to now generate the memcpy call to do the - // concatenation for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy(CpyDst, 1, Src, 1, - ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1)); - return Dst; -} - -Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) { - // Extract some information from the instruction. - Value *Dst = CI->getArgOperand(0); - Value *Src = CI->getArgOperand(1); - uint64_t Len; - - // We don't do anything if length is not constant. - if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) - Len = LengthArg->getZExtValue(); - else - return nullptr; - - // See if we can get the length of the input string. - uint64_t SrcLen = GetStringLength(Src); - if (SrcLen == 0) - return nullptr; - --SrcLen; // Unbias length. - - // Handle the simple, do-nothing cases: - // strncat(x, "", c) -> x - // strncat(x, c, 0) -> x - if (SrcLen == 0 || Len == 0) - return Dst; - - // We don't optimize this case. - if (Len < SrcLen) - return nullptr; - - // strncat(x, s, c) -> strcat(x, s) - // s is constant so the strcat can be optimized further. - return emitStrLenMemCpy(Src, Dst, SrcLen, B); -} - -Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - Value *SrcStr = CI->getArgOperand(0); - - // If the second operand is non-constant, see if we can compute the length - // of the input string and turn this into memchr. - ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); - if (!CharC) { - uint64_t Len = GetStringLength(SrcStr); - if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32. - return nullptr; - - return emitMemChr(SrcStr, CI->getArgOperand(1), // include nul. - ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), - B, DL, TLI); - } - - // Otherwise, the character is a constant, see if the first argument is - // a string literal. If so, we can constant fold. - StringRef Str; - if (!getConstantStringInfo(SrcStr, Str)) { - if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p) - return B.CreateGEP(B.getInt8Ty(), SrcStr, emitStrLen(SrcStr, B, DL, TLI), - "strchr"); - return nullptr; - } - - // Compute the offset, make sure to handle the case when we're searching for - // zero (a weird way to spell strlen). - size_t I = (0xFF & CharC->getSExtValue()) == 0 - ? Str.size() - : Str.find(CharC->getSExtValue()); - if (I == StringRef::npos) // Didn't find the char. strchr returns null. - return Constant::getNullValue(CI->getType()); - - // strchr(s+n,c) -> gep(s+n+i,c) - return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr"); -} - -Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) { - Value *SrcStr = CI->getArgOperand(0); - ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); - - // Cannot fold anything if we're not looking for a constant. - if (!CharC) - return nullptr; - - StringRef Str; - if (!getConstantStringInfo(SrcStr, Str)) { - // strrchr(s, 0) -> strchr(s, 0) - if (CharC->isZero()) - return emitStrChr(SrcStr, '\0', B, TLI); - return nullptr; - } - - // Compute the offset. - size_t I = (0xFF & CharC->getSExtValue()) == 0 - ? Str.size() - : Str.rfind(CharC->getSExtValue()); - if (I == StringRef::npos) // Didn't find the char. Return null. - return Constant::getNullValue(CI->getType()); - - // strrchr(s+n,c) -> gep(s+n+i,c) - return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strrchr"); -} - -Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) { - Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); - if (Str1P == Str2P) // strcmp(x,x) -> 0 - return ConstantInt::get(CI->getType(), 0); - - StringRef Str1, Str2; - bool HasStr1 = getConstantStringInfo(Str1P, Str1); - bool HasStr2 = getConstantStringInfo(Str2P, Str2); - - // strcmp(x, y) -> cnst (if both x and y are constant strings) - if (HasStr1 && HasStr2) - return ConstantInt::get(CI->getType(), Str1.compare(Str2)); - - if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x - return B.CreateNeg(B.CreateZExt( - B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType())); - - if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x - return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"), - CI->getType()); - - // strcmp(P, "x") -> memcmp(P, "x", 2) - uint64_t Len1 = GetStringLength(Str1P); - uint64_t Len2 = GetStringLength(Str2P); - if (Len1 && Len2) { - return emitMemCmp(Str1P, Str2P, - ConstantInt::get(DL.getIntPtrType(CI->getContext()), - std::min(Len1, Len2)), - B, DL, TLI); - } - - // strcmp to memcmp - if (!HasStr1 && HasStr2) { - if (canTransformToMemCmp(CI, Str1P, Len2, DL)) - return emitMemCmp( - Str1P, Str2P, - ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), B, DL, - TLI); - } else if (HasStr1 && !HasStr2) { - if (canTransformToMemCmp(CI, Str2P, Len1, DL)) - return emitMemCmp( - Str1P, Str2P, - ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), B, DL, - TLI); - } - - return nullptr; -} - -Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { - Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); - if (Str1P == Str2P) // strncmp(x,x,n) -> 0 - return ConstantInt::get(CI->getType(), 0); - - // Get the length argument if it is constant. - uint64_t Length; - if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) - Length = LengthArg->getZExtValue(); - else - return nullptr; - - if (Length == 0) // strncmp(x,y,0) -> 0 - return ConstantInt::get(CI->getType(), 0); - - if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) - return emitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI); - - StringRef Str1, Str2; - bool HasStr1 = getConstantStringInfo(Str1P, Str1); - bool HasStr2 = getConstantStringInfo(Str2P, Str2); - - // strncmp(x, y) -> cnst (if both x and y are constant strings) - if (HasStr1 && HasStr2) { - StringRef SubStr1 = Str1.substr(0, Length); - StringRef SubStr2 = Str2.substr(0, Length); - return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2)); - } - - if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x - return B.CreateNeg(B.CreateZExt( - B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType())); - - if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x - return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"), - CI->getType()); - - uint64_t Len1 = GetStringLength(Str1P); - uint64_t Len2 = GetStringLength(Str2P); - - // strncmp to memcmp - if (!HasStr1 && HasStr2) { - Len2 = std::min(Len2, Length); - if (canTransformToMemCmp(CI, Str1P, Len2, DL)) - return emitMemCmp( - Str1P, Str2P, - ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), B, DL, - TLI); - } else if (HasStr1 && !HasStr2) { - Len1 = std::min(Len1, Length); - if (canTransformToMemCmp(CI, Str2P, Len1, DL)) - return emitMemCmp( - Str1P, Str2P, - ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), B, DL, - TLI); - } - - return nullptr; -} - -Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) { - Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); - if (Dst == Src) // strcpy(x,x) -> x - return Src; - - // See if we can get the length of the input string. - uint64_t Len = GetStringLength(Src); - if (Len == 0) - return nullptr; - - // We have enough information to now generate the memcpy call to do the - // copy for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy(Dst, 1, Src, 1, - ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len)); - return Dst; -} - -Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); - if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) - Value *StrLen = emitStrLen(Src, B, DL, TLI); - return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr; - } - - // See if we can get the length of the input string. - uint64_t Len = GetStringLength(Src); - if (Len == 0) - return nullptr; - - Type *PT = Callee->getFunctionType()->getParamType(0); - Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len); - Value *DstEnd = B.CreateGEP(B.getInt8Ty(), Dst, - ConstantInt::get(DL.getIntPtrType(PT), Len - 1)); - - // We have enough information to now generate the memcpy call to do the - // copy for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy(Dst, 1, Src, 1, LenV); - return DstEnd; -} - -Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - Value *Dst = CI->getArgOperand(0); - Value *Src = CI->getArgOperand(1); - Value *LenOp = CI->getArgOperand(2); - - // See if we can get the length of the input string. - uint64_t SrcLen = GetStringLength(Src); - if (SrcLen == 0) - return nullptr; - --SrcLen; - - if (SrcLen == 0) { - // strncpy(x, "", y) -> memset(align 1 x, '\0', y) - B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1); - return Dst; - } - - uint64_t Len; - if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp)) - Len = LengthArg->getZExtValue(); - else - return nullptr; - - if (Len == 0) - return Dst; // strncpy(x, y, 0) -> x - - // Let strncpy handle the zero padding - if (Len > SrcLen + 1) - return nullptr; - - Type *PT = Callee->getFunctionType()->getParamType(0); - // strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant] - B.CreateMemCpy(Dst, 1, Src, 1, ConstantInt::get(DL.getIntPtrType(PT), Len)); - - return Dst; -} - -Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B, - unsigned CharSize) { - Value *Src = CI->getArgOperand(0); - - // Constant folding: strlen("xyz") -> 3 - if (uint64_t Len = GetStringLength(Src, CharSize)) - return ConstantInt::get(CI->getType(), Len - 1); - - // If s is a constant pointer pointing to a string literal, we can fold - // strlen(s + x) to strlen(s) - x, when x is known to be in the range - // [0, strlen(s)] or the string has a single null terminator '\0' at the end. - // We only try to simplify strlen when the pointer s points to an array - // of i8. Otherwise, we would need to scale the offset x before doing the - // subtraction. This will make the optimization more complex, and it's not - // very useful because calling strlen for a pointer of other types is - // very uncommon. - if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) { - if (!isGEPBasedOnPointerToString(GEP, CharSize)) - return nullptr; - - ConstantDataArraySlice Slice; - if (getConstantDataArrayInfo(GEP->getOperand(0), Slice, CharSize)) { - uint64_t NullTermIdx; - if (Slice.Array == nullptr) { - NullTermIdx = 0; - } else { - NullTermIdx = ~((uint64_t)0); - for (uint64_t I = 0, E = Slice.Length; I < E; ++I) { - if (Slice.Array->getElementAsInteger(I + Slice.Offset) == 0) { - NullTermIdx = I; - break; - } - } - // If the string does not have '\0', leave it to strlen to compute - // its length. - if (NullTermIdx == ~((uint64_t)0)) - return nullptr; - } - - Value *Offset = GEP->getOperand(2); - KnownBits Known = computeKnownBits(Offset, DL, 0, nullptr, CI, nullptr); - Known.Zero.flipAllBits(); - uint64_t ArrSize = - cast<ArrayType>(GEP->getSourceElementType())->getNumElements(); - - // KnownZero's bits are flipped, so zeros in KnownZero now represent - // bits known to be zeros in Offset, and ones in KnowZero represent - // bits unknown in Offset. Therefore, Offset is known to be in range - // [0, NullTermIdx] when the flipped KnownZero is non-negative and - // unsigned-less-than NullTermIdx. - // - // If Offset is not provably in the range [0, NullTermIdx], we can still - // optimize if we can prove that the program has undefined behavior when - // Offset is outside that range. That is the case when GEP->getOperand(0) - // is a pointer to an object whose memory extent is NullTermIdx+1. - if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) || - (GEP->isInBounds() && isa<GlobalVariable>(GEP->getOperand(0)) && - NullTermIdx == ArrSize - 1)) { - Offset = B.CreateSExtOrTrunc(Offset, CI->getType()); - return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx), - Offset); - } - } - - return nullptr; - } - - // strlen(x?"foo":"bars") --> x ? 3 : 4 - if (SelectInst *SI = dyn_cast<SelectInst>(Src)) { - uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize); - uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize); - if (LenTrue && LenFalse) { - ORE.emit([&]() { - return OptimizationRemark("instcombine", "simplify-libcalls", CI) - << "folded strlen(select) to select of constants"; - }); - return B.CreateSelect(SI->getCondition(), - ConstantInt::get(CI->getType(), LenTrue - 1), - ConstantInt::get(CI->getType(), LenFalse - 1)); - } - } - - // strlen(x) != 0 --> *x != 0 - // strlen(x) == 0 --> *x == 0 - if (isOnlyUsedInZeroEqualityComparison(CI)) - return B.CreateZExt(B.CreateLoad(B.getIntNTy(CharSize), Src, "strlenfirst"), - CI->getType()); - - return nullptr; -} - -Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { - return optimizeStringLength(CI, B, 8); -} - -Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) { - Module &M = *CI->getModule(); - unsigned WCharSize = TLI->getWCharSize(M) * 8; - // We cannot perform this optimization without wchar_size metadata. - if (WCharSize == 0) - return nullptr; - - return optimizeStringLength(CI, B, WCharSize); -} - -Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) { - StringRef S1, S2; - bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); - bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); - - // strpbrk(s, "") -> nullptr - // strpbrk("", s) -> nullptr - if ((HasS1 && S1.empty()) || (HasS2 && S2.empty())) - return Constant::getNullValue(CI->getType()); - - // Constant folding. - if (HasS1 && HasS2) { - size_t I = S1.find_first_of(S2); - if (I == StringRef::npos) // No match. - return Constant::getNullValue(CI->getType()); - - return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I), - "strpbrk"); - } - - // strpbrk(s, "a") -> strchr(s, 'a') - if (HasS2 && S2.size() == 1) - return emitStrChr(CI->getArgOperand(0), S2[0], B, TLI); - - return nullptr; -} - -Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilder<> &B) { - Value *EndPtr = CI->getArgOperand(1); - if (isa<ConstantPointerNull>(EndPtr)) { - // With a null EndPtr, this function won't capture the main argument. - // It would be readonly too, except that it still may write to errno. - CI->addParamAttr(0, Attribute::NoCapture); - } - - return nullptr; -} - -Value *LibCallSimplifier::optimizeStrSpn(CallInst *CI, IRBuilder<> &B) { - StringRef S1, S2; - bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); - bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); - - // strspn(s, "") -> 0 - // strspn("", s) -> 0 - if ((HasS1 && S1.empty()) || (HasS2 && S2.empty())) - return Constant::getNullValue(CI->getType()); - - // Constant folding. - if (HasS1 && HasS2) { - size_t Pos = S1.find_first_not_of(S2); - if (Pos == StringRef::npos) - Pos = S1.size(); - return ConstantInt::get(CI->getType(), Pos); - } - - return nullptr; -} - -Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilder<> &B) { - StringRef S1, S2; - bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); - bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); - - // strcspn("", s) -> 0 - if (HasS1 && S1.empty()) - return Constant::getNullValue(CI->getType()); - - // Constant folding. - if (HasS1 && HasS2) { - size_t Pos = S1.find_first_of(S2); - if (Pos == StringRef::npos) - Pos = S1.size(); - return ConstantInt::get(CI->getType(), Pos); - } - - // strcspn(s, "") -> strlen(s) - if (HasS2 && S2.empty()) - return emitStrLen(CI->getArgOperand(0), B, DL, TLI); - - return nullptr; -} - -Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) { - // fold strstr(x, x) -> x. - if (CI->getArgOperand(0) == CI->getArgOperand(1)) - return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); - - // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 - if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { - Value *StrLen = emitStrLen(CI->getArgOperand(1), B, DL, TLI); - if (!StrLen) - return nullptr; - Value *StrNCmp = emitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), - StrLen, B, DL, TLI); - if (!StrNCmp) - return nullptr; - for (auto UI = CI->user_begin(), UE = CI->user_end(); UI != UE;) { - ICmpInst *Old = cast<ICmpInst>(*UI++); - Value *Cmp = - B.CreateICmp(Old->getPredicate(), StrNCmp, - ConstantInt::getNullValue(StrNCmp->getType()), "cmp"); - replaceAllUsesWith(Old, Cmp); - } - return CI; - } - - // See if either input string is a constant string. - StringRef SearchStr, ToFindStr; - bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr); - bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr); - - // fold strstr(x, "") -> x. - if (HasStr2 && ToFindStr.empty()) - return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); - - // If both strings are known, constant fold it. - if (HasStr1 && HasStr2) { - size_t Offset = SearchStr.find(ToFindStr); - - if (Offset == StringRef::npos) // strstr("foo", "bar") -> null - return Constant::getNullValue(CI->getType()); - - // strstr("abcd", "bc") -> gep((char*)"abcd", 1) - Value *Result = castToCStr(CI->getArgOperand(0), B); - Result = - B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), Result, Offset, "strstr"); - return B.CreateBitCast(Result, CI->getType()); - } - - // fold strstr(x, "y") -> strchr(x, 'y'). - if (HasStr2 && ToFindStr.size() == 1) { - Value *StrChr = emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI); - return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr; - } - return nullptr; -} - -Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { - Value *SrcStr = CI->getArgOperand(0); - ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); - ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); - - // memchr(x, y, 0) -> null - if (LenC && LenC->isZero()) - return Constant::getNullValue(CI->getType()); - - // From now on we need at least constant length and string. - StringRef Str; - if (!LenC || !getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false)) - return nullptr; - - // Truncate the string to LenC. If Str is smaller than LenC we will still only - // scan the string, as reading past the end of it is undefined and we can just - // return null if we don't find the char. - Str = Str.substr(0, LenC->getZExtValue()); - - // If the char is variable but the input str and length are not we can turn - // this memchr call into a simple bit field test. Of course this only works - // when the return value is only checked against null. - // - // It would be really nice to reuse switch lowering here but we can't change - // the CFG at this point. - // - // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n'))) - // != 0 - // after bounds check. - if (!CharC && !Str.empty() && isOnlyUsedInZeroEqualityComparison(CI)) { - unsigned char Max = - *std::max_element(reinterpret_cast<const unsigned char *>(Str.begin()), - reinterpret_cast<const unsigned char *>(Str.end())); - - // Make sure the bit field we're about to create fits in a register on the - // target. - // FIXME: On a 64 bit architecture this prevents us from using the - // interesting range of alpha ascii chars. We could do better by emitting - // two bitfields or shifting the range by 64 if no lower chars are used. - if (!DL.fitsInLegalInteger(Max + 1)) - return nullptr; - - // For the bit field use a power-of-2 type with at least 8 bits to avoid - // creating unnecessary illegal types. - unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max)); - - // Now build the bit field. - APInt Bitfield(Width, 0); - for (char C : Str) - Bitfield.setBit((unsigned char)C); - Value *BitfieldC = B.getInt(Bitfield); - - // Adjust width of "C" to the bitfield width, then mask off the high bits. - Value *C = B.CreateZExtOrTrunc(CI->getArgOperand(1), BitfieldC->getType()); - C = B.CreateAnd(C, B.getIntN(Width, 0xFF)); - - // First check that the bit field access is within bounds. - Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width), - "memchr.bounds"); - - // Create code that checks if the given bit is set in the field. - Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C); - Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits"); - - // Finally merge both checks and cast to pointer type. The inttoptr - // implicitly zexts the i1 to intptr type. - return B.CreateIntToPtr(B.CreateAnd(Bounds, Bits, "memchr"), CI->getType()); - } - - // Check if all arguments are constants. If so, we can constant fold. - if (!CharC) - return nullptr; - - // Compute the offset. - size_t I = Str.find(CharC->getSExtValue() & 0xFF); - if (I == StringRef::npos) // Didn't find the char. memchr returns null. - return Constant::getNullValue(CI->getType()); - - // memchr(s+n,c,l) -> gep(s+n+i,c) - return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr"); -} - -static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS, - uint64_t Len, IRBuilder<> &B, - const DataLayout &DL) { - if (Len == 0) // memcmp(s1,s2,0) -> 0 - return Constant::getNullValue(CI->getType()); - - // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS - if (Len == 1) { - Value *LHSV = - B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(LHS, B), "lhsc"), - CI->getType(), "lhsv"); - Value *RHSV = - B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(RHS, B), "rhsc"), - CI->getType(), "rhsv"); - return B.CreateSub(LHSV, RHSV, "chardiff"); - } - - // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0 - // TODO: The case where both inputs are constants does not need to be limited - // to legal integers or equality comparison. See block below this. - if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) { - IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8); - unsigned PrefAlignment = DL.getPrefTypeAlignment(IntType); - - // First, see if we can fold either argument to a constant. - Value *LHSV = nullptr; - if (auto *LHSC = dyn_cast<Constant>(LHS)) { - LHSC = ConstantExpr::getBitCast(LHSC, IntType->getPointerTo()); - LHSV = ConstantFoldLoadFromConstPtr(LHSC, IntType, DL); - } - Value *RHSV = nullptr; - if (auto *RHSC = dyn_cast<Constant>(RHS)) { - RHSC = ConstantExpr::getBitCast(RHSC, IntType->getPointerTo()); - RHSV = ConstantFoldLoadFromConstPtr(RHSC, IntType, DL); - } - - // Don't generate unaligned loads. If either source is constant data, - // alignment doesn't matter for that source because there is no load. - if ((LHSV || getKnownAlignment(LHS, DL, CI) >= PrefAlignment) && - (RHSV || getKnownAlignment(RHS, DL, CI) >= PrefAlignment)) { - if (!LHSV) { - Type *LHSPtrTy = - IntType->getPointerTo(LHS->getType()->getPointerAddressSpace()); - LHSV = B.CreateLoad(IntType, B.CreateBitCast(LHS, LHSPtrTy), "lhsv"); - } - if (!RHSV) { - Type *RHSPtrTy = - IntType->getPointerTo(RHS->getType()->getPointerAddressSpace()); - RHSV = B.CreateLoad(IntType, B.CreateBitCast(RHS, RHSPtrTy), "rhsv"); - } - return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp"); - } - } - - // Constant folding: memcmp(x, y, Len) -> constant (all arguments are const). - // TODO: This is limited to i8 arrays. - StringRef LHSStr, RHSStr; - if (getConstantStringInfo(LHS, LHSStr) && - getConstantStringInfo(RHS, RHSStr)) { - // Make sure we're not reading out-of-bounds memory. - if (Len > LHSStr.size() || Len > RHSStr.size()) - return nullptr; - // Fold the memcmp and normalize the result. This way we get consistent - // results across multiple platforms. - uint64_t Ret = 0; - int Cmp = memcmp(LHSStr.data(), RHSStr.data(), Len); - if (Cmp < 0) - Ret = -1; - else if (Cmp > 0) - Ret = 1; - return ConstantInt::get(CI->getType(), Ret); - } - return nullptr; -} - -// Most simplifications for memcmp also apply to bcmp. -Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI, - IRBuilder<> &B) { - Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); - Value *Size = CI->getArgOperand(2); - - if (LHS == RHS) // memcmp(s,s,x) -> 0 - return Constant::getNullValue(CI->getType()); - - // Handle constant lengths. - if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size)) - if (Value *Res = optimizeMemCmpConstantSize(CI, LHS, RHS, - LenC->getZExtValue(), B, DL)) - return Res; - - return nullptr; -} - -Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { - if (Value *V = optimizeMemCmpBCmpCommon(CI, B)) - return V; - - // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0 - // `bcmp` can be more efficient than memcmp because it only has to know that - // there is a difference, not where it is. - if (isOnlyUsedInZeroEqualityComparison(CI) && TLI->has(LibFunc_bcmp)) { - Value *LHS = CI->getArgOperand(0); - Value *RHS = CI->getArgOperand(1); - Value *Size = CI->getArgOperand(2); - return emitBCmp(LHS, RHS, Size, B, DL, TLI); - } - - return nullptr; -} - -Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilder<> &B) { - return optimizeMemCmpBCmpCommon(CI, B); -} - -Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { - // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n) - B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, - CI->getArgOperand(2)); - return CI->getArgOperand(0); -} - -Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) { - // memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n) - B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, - CI->getArgOperand(2)); - return CI->getArgOperand(0); -} - -/// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n). -Value *LibCallSimplifier::foldMallocMemset(CallInst *Memset, IRBuilder<> &B) { - // This has to be a memset of zeros (bzero). - auto *FillValue = dyn_cast<ConstantInt>(Memset->getArgOperand(1)); - if (!FillValue || FillValue->getZExtValue() != 0) - return nullptr; - - // TODO: We should handle the case where the malloc has more than one use. - // This is necessary to optimize common patterns such as when the result of - // the malloc is checked against null or when a memset intrinsic is used in - // place of a memset library call. - auto *Malloc = dyn_cast<CallInst>(Memset->getArgOperand(0)); - if (!Malloc || !Malloc->hasOneUse()) - return nullptr; - - // Is the inner call really malloc()? - Function *InnerCallee = Malloc->getCalledFunction(); - if (!InnerCallee) - return nullptr; - - LibFunc Func; - if (!TLI->getLibFunc(*InnerCallee, Func) || !TLI->has(Func) || - Func != LibFunc_malloc) - return nullptr; - - // The memset must cover the same number of bytes that are malloc'd. - if (Memset->getArgOperand(2) != Malloc->getArgOperand(0)) - return nullptr; - - // Replace the malloc with a calloc. We need the data layout to know what the - // actual size of a 'size_t' parameter is. - B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator()); - const DataLayout &DL = Malloc->getModule()->getDataLayout(); - IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext()); - Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1), - Malloc->getArgOperand(0), Malloc->getAttributes(), - B, *TLI); - if (!Calloc) - return nullptr; - - Malloc->replaceAllUsesWith(Calloc); - eraseFromParent(Malloc); - - return Calloc; -} - -Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) { - if (auto *Calloc = foldMallocMemset(CI, B)) - return Calloc; - - // memset(p, v, n) -> llvm.memset(align 1 p, v, n) - Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); - B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); - return CI->getArgOperand(0); -} - -Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilder<> &B) { - if (isa<ConstantPointerNull>(CI->getArgOperand(0))) - return emitMalloc(CI->getArgOperand(1), B, DL, TLI); - - return nullptr; -} - -//===----------------------------------------------------------------------===// -// Math Library Optimizations -//===----------------------------------------------------------------------===// - -// Replace a libcall \p CI with a call to intrinsic \p IID -static Value *replaceUnaryCall(CallInst *CI, IRBuilder<> &B, Intrinsic::ID IID) { - // Propagate fast-math flags from the existing call to the new call. - IRBuilder<>::FastMathFlagGuard Guard(B); - B.setFastMathFlags(CI->getFastMathFlags()); - - Module *M = CI->getModule(); - Value *V = CI->getArgOperand(0); - Function *F = Intrinsic::getDeclaration(M, IID, CI->getType()); - CallInst *NewCall = B.CreateCall(F, V); - NewCall->takeName(CI); - return NewCall; -} - -/// Return a variant of Val with float type. -/// Currently this works in two cases: If Val is an FPExtension of a float -/// value to something bigger, simply return the operand. -/// If Val is a ConstantFP but can be converted to a float ConstantFP without -/// loss of precision do so. -static Value *valueHasFloatPrecision(Value *Val) { - if (FPExtInst *Cast = dyn_cast<FPExtInst>(Val)) { - Value *Op = Cast->getOperand(0); - if (Op->getType()->isFloatTy()) - return Op; - } - if (ConstantFP *Const = dyn_cast<ConstantFP>(Val)) { - APFloat F = Const->getValueAPF(); - bool losesInfo; - (void)F.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, - &losesInfo); - if (!losesInfo) - return ConstantFP::get(Const->getContext(), F); - } - return nullptr; -} - -/// Shrink double -> float functions. -static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B, - bool isBinary, bool isPrecise = false) { - Function *CalleeFn = CI->getCalledFunction(); - if (!CI->getType()->isDoubleTy() || !CalleeFn) - return nullptr; - - // If not all the uses of the function are converted to float, then bail out. - // This matters if the precision of the result is more important than the - // precision of the arguments. - if (isPrecise) - for (User *U : CI->users()) { - FPTruncInst *Cast = dyn_cast<FPTruncInst>(U); - if (!Cast || !Cast->getType()->isFloatTy()) - return nullptr; - } - - // If this is something like 'g((double) float)', convert to 'gf(float)'. - Value *V[2]; - V[0] = valueHasFloatPrecision(CI->getArgOperand(0)); - V[1] = isBinary ? valueHasFloatPrecision(CI->getArgOperand(1)) : nullptr; - if (!V[0] || (isBinary && !V[1])) - return nullptr; - - StringRef CalleeNm = CalleeFn->getName(); - AttributeList CalleeAt = CalleeFn->getAttributes(); - bool CalleeIn = CalleeFn->isIntrinsic(); - - // If call isn't an intrinsic, check that it isn't within a function with the - // same name as the float version of this call, otherwise the result is an - // infinite loop. For example, from MinGW-w64: - // - // float expf(float val) { return (float) exp((double) val); } - if (!CalleeIn) { - const Function *Fn = CI->getFunction(); - StringRef FnName = Fn->getName(); - if (FnName.back() == 'f' && - FnName.size() == (CalleeNm.size() + 1) && - FnName.startswith(CalleeNm)) - return nullptr; - } - - // Propagate the math semantics from the current function to the new function. - IRBuilder<>::FastMathFlagGuard Guard(B); - B.setFastMathFlags(CI->getFastMathFlags()); - - // g((double) float) -> (double) gf(float) - Value *R; - if (CalleeIn) { - Module *M = CI->getModule(); - Intrinsic::ID IID = CalleeFn->getIntrinsicID(); - Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy()); - R = isBinary ? B.CreateCall(Fn, V) : B.CreateCall(Fn, V[0]); - } - else - R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], CalleeNm, B, CalleeAt) - : emitUnaryFloatFnCall(V[0], CalleeNm, B, CalleeAt); - - return B.CreateFPExt(R, B.getDoubleTy()); -} - -/// Shrink double -> float for unary functions. -static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, - bool isPrecise = false) { - return optimizeDoubleFP(CI, B, false, isPrecise); -} - -/// Shrink double -> float for binary functions. -static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B, - bool isPrecise = false) { - return optimizeDoubleFP(CI, B, true, isPrecise); -} - -// cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z))) -Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilder<> &B) { - if (!CI->isFast()) - return nullptr; - - // Propagate fast-math flags from the existing call to new instructions. - IRBuilder<>::FastMathFlagGuard Guard(B); - B.setFastMathFlags(CI->getFastMathFlags()); - - Value *Real, *Imag; - if (CI->getNumArgOperands() == 1) { - Value *Op = CI->getArgOperand(0); - assert(Op->getType()->isArrayTy() && "Unexpected signature for cabs!"); - Real = B.CreateExtractValue(Op, 0, "real"); - Imag = B.CreateExtractValue(Op, 1, "imag"); - } else { - assert(CI->getNumArgOperands() == 2 && "Unexpected signature for cabs!"); - Real = CI->getArgOperand(0); - Imag = CI->getArgOperand(1); - } - - Value *RealReal = B.CreateFMul(Real, Real); - Value *ImagImag = B.CreateFMul(Imag, Imag); - - Function *FSqrt = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::sqrt, - CI->getType()); - return B.CreateCall(FSqrt, B.CreateFAdd(RealReal, ImagImag), "cabs"); -} - -static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func, - IRBuilder<> &B) { - if (!isa<FPMathOperator>(Call)) - return nullptr; - - IRBuilder<>::FastMathFlagGuard Guard(B); - B.setFastMathFlags(Call->getFastMathFlags()); - - // TODO: Can this be shared to also handle LLVM intrinsics? - Value *X; - switch (Func) { - case LibFunc_sin: - case LibFunc_sinf: - case LibFunc_sinl: - case LibFunc_tan: - case LibFunc_tanf: - case LibFunc_tanl: - // sin(-X) --> -sin(X) - // tan(-X) --> -tan(X) - if (match(Call->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) - return B.CreateFNeg(B.CreateCall(Call->getCalledFunction(), X)); - break; - case LibFunc_cos: - case LibFunc_cosf: - case LibFunc_cosl: - // cos(-X) --> cos(X) - if (match(Call->getArgOperand(0), m_FNeg(m_Value(X)))) - return B.CreateCall(Call->getCalledFunction(), X, "cos"); - break; - default: - break; - } - return nullptr; -} - -static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) { - // Multiplications calculated using Addition Chains. - // Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html - - assert(Exp != 0 && "Incorrect exponent 0 not handled"); - - if (InnerChain[Exp]) - return InnerChain[Exp]; - - static const unsigned AddChain[33][2] = { - {0, 0}, // Unused. - {0, 0}, // Unused (base case = pow1). - {1, 1}, // Unused (pre-computed). - {1, 2}, {2, 2}, {2, 3}, {3, 3}, {2, 5}, {4, 4}, - {1, 8}, {5, 5}, {1, 10}, {6, 6}, {4, 9}, {7, 7}, - {3, 12}, {8, 8}, {8, 9}, {2, 16}, {1, 18}, {10, 10}, - {6, 15}, {11, 11}, {3, 20}, {12, 12}, {8, 17}, {13, 13}, - {3, 24}, {14, 14}, {4, 25}, {15, 15}, {3, 28}, {16, 16}, - }; - - InnerChain[Exp] = B.CreateFMul(getPow(InnerChain, AddChain[Exp][0], B), - getPow(InnerChain, AddChain[Exp][1], B)); - return InnerChain[Exp]; -} - -/// Use exp{,2}(x * y) for pow(exp{,2}(x), y); -/// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x); -/// exp2(log2(n) * x) for pow(n, x). -Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { - Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); - AttributeList Attrs = Pow->getCalledFunction()->getAttributes(); - Module *Mod = Pow->getModule(); - Type *Ty = Pow->getType(); - bool Ignored; - - // Evaluate special cases related to a nested function as the base. - - // pow(exp(x), y) -> exp(x * y) - // pow(exp2(x), y) -> exp2(x * y) - // If exp{,2}() is used only once, it is better to fold two transcendental - // math functions into one. If used again, exp{,2}() would still have to be - // called with the original argument, then keep both original transcendental - // functions. However, this transformation is only safe with fully relaxed - // math semantics, since, besides rounding differences, it changes overflow - // and underflow behavior quite dramatically. For example: - // pow(exp(1000), 0.001) = pow(inf, 0.001) = inf - // Whereas: - // exp(1000 * 0.001) = exp(1) - // TODO: Loosen the requirement for fully relaxed math semantics. - // TODO: Handle exp10() when more targets have it available. - CallInst *BaseFn = dyn_cast<CallInst>(Base); - if (BaseFn && BaseFn->hasOneUse() && BaseFn->isFast() && Pow->isFast()) { - LibFunc LibFn; - - Function *CalleeFn = BaseFn->getCalledFunction(); - if (CalleeFn && - TLI->getLibFunc(CalleeFn->getName(), LibFn) && TLI->has(LibFn)) { - StringRef ExpName; - Intrinsic::ID ID; - Value *ExpFn; - LibFunc LibFnFloat; - LibFunc LibFnDouble; - LibFunc LibFnLongDouble; - - switch (LibFn) { - default: - return nullptr; - case LibFunc_expf: case LibFunc_exp: case LibFunc_expl: - ExpName = TLI->getName(LibFunc_exp); - ID = Intrinsic::exp; - LibFnFloat = LibFunc_expf; - LibFnDouble = LibFunc_exp; - LibFnLongDouble = LibFunc_expl; - break; - case LibFunc_exp2f: case LibFunc_exp2: case LibFunc_exp2l: - ExpName = TLI->getName(LibFunc_exp2); - ID = Intrinsic::exp2; - LibFnFloat = LibFunc_exp2f; - LibFnDouble = LibFunc_exp2; - LibFnLongDouble = LibFunc_exp2l; - break; - } - - // Create new exp{,2}() with the product as its argument. - Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul"); - ExpFn = BaseFn->doesNotAccessMemory() - ? B.CreateCall(Intrinsic::getDeclaration(Mod, ID, Ty), - FMul, ExpName) - : emitUnaryFloatFnCall(FMul, TLI, LibFnDouble, LibFnFloat, - LibFnLongDouble, B, - BaseFn->getAttributes()); - - // Since the new exp{,2}() is different from the original one, dead code - // elimination cannot be trusted to remove it, since it may have side - // effects (e.g., errno). When the only consumer for the original - // exp{,2}() is pow(), then it has to be explicitly erased. - BaseFn->replaceAllUsesWith(ExpFn); - eraseFromParent(BaseFn); - - return ExpFn; - } - } - - // Evaluate special cases related to a constant base. - - const APFloat *BaseF; - if (!match(Pow->getArgOperand(0), m_APFloat(BaseF))) - return nullptr; - - // pow(2.0 ** n, x) -> exp2(n * x) - if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) { - APFloat BaseR = APFloat(1.0); - BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored); - BaseR = BaseR / *BaseF; - bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger(); - const APFloat *NF = IsReciprocal ? &BaseR : BaseF; - APSInt NI(64, false); - if ((IsInteger || IsReciprocal) && - NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) == - APFloat::opOK && - NI > 1 && NI.isPowerOf2()) { - double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0); - Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul"); - if (Pow->doesNotAccessMemory()) - return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty), - FMul, "exp2"); - else - return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f, - LibFunc_exp2l, B, Attrs); - } - } - - // pow(10.0, x) -> exp10(x) - // TODO: There is no exp10() intrinsic yet, but some day there shall be one. - if (match(Base, m_SpecificFP(10.0)) && - hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) - return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f, - LibFunc_exp10l, B, Attrs); - - // pow(n, x) -> exp2(log2(n) * x) - if (Pow->hasOneUse() && Pow->hasApproxFunc() && Pow->hasNoNaNs() && - Pow->hasNoInfs() && BaseF->isNormal() && !BaseF->isNegative()) { - Value *Log = nullptr; - if (Ty->isFloatTy()) - Log = ConstantFP::get(Ty, std::log2(BaseF->convertToFloat())); - else if (Ty->isDoubleTy()) - Log = ConstantFP::get(Ty, std::log2(BaseF->convertToDouble())); - - if (Log) { - Value *FMul = B.CreateFMul(Log, Expo, "mul"); - if (Pow->doesNotAccessMemory()) { - return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty), - FMul, "exp2"); - } else { - if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, - LibFunc_exp2l)) - return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f, - LibFunc_exp2l, B, Attrs); - } - } - } - return nullptr; -} - -static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno, - Module *M, IRBuilder<> &B, - const TargetLibraryInfo *TLI) { - // If errno is never set, then use the intrinsic for sqrt(). - if (NoErrno) { - Function *SqrtFn = - Intrinsic::getDeclaration(M, Intrinsic::sqrt, V->getType()); - return B.CreateCall(SqrtFn, V, "sqrt"); - } - - // Otherwise, use the libcall for sqrt(). - if (hasUnaryFloatFn(TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf, - LibFunc_sqrtl)) - // TODO: We also should check that the target can in fact lower the sqrt() - // libcall. We currently have no way to ask this question, so we ask if - // the target has a sqrt() libcall, which is not exactly the same. - return emitUnaryFloatFnCall(V, TLI, LibFunc_sqrt, LibFunc_sqrtf, - LibFunc_sqrtl, B, Attrs); - - return nullptr; -} - -/// Use square root in place of pow(x, +/-0.5). -Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) { - Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); - AttributeList Attrs = Pow->getCalledFunction()->getAttributes(); - Module *Mod = Pow->getModule(); - Type *Ty = Pow->getType(); - - const APFloat *ExpoF; - if (!match(Expo, m_APFloat(ExpoF)) || - (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5))) - return nullptr; - - Sqrt = getSqrtCall(Base, Attrs, Pow->doesNotAccessMemory(), Mod, B, TLI); - if (!Sqrt) - return nullptr; - - // Handle signed zero base by expanding to fabs(sqrt(x)). - if (!Pow->hasNoSignedZeros()) { - Function *FAbsFn = Intrinsic::getDeclaration(Mod, Intrinsic::fabs, Ty); - Sqrt = B.CreateCall(FAbsFn, Sqrt, "abs"); - } - - // Handle non finite base by expanding to - // (x == -infinity ? +infinity : sqrt(x)). - if (!Pow->hasNoInfs()) { - Value *PosInf = ConstantFP::getInfinity(Ty), - *NegInf = ConstantFP::getInfinity(Ty, true); - Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf"); - Sqrt = B.CreateSelect(FCmp, PosInf, Sqrt); - } - - // If the exponent is negative, then get the reciprocal. - if (ExpoF->isNegative()) - Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal"); - - return Sqrt; -} - -static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M, - IRBuilder<> &B) { - Value *Args[] = {Base, Expo}; - Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType()); - return B.CreateCall(F, Args); -} - -Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { - Value *Base = Pow->getArgOperand(0); - Value *Expo = Pow->getArgOperand(1); - Function *Callee = Pow->getCalledFunction(); - StringRef Name = Callee->getName(); - Type *Ty = Pow->getType(); - Module *M = Pow->getModule(); - Value *Shrunk = nullptr; - bool AllowApprox = Pow->hasApproxFunc(); - bool Ignored; - - // Bail out if simplifying libcalls to pow() is disabled. - if (!hasUnaryFloatFn(TLI, Ty, LibFunc_pow, LibFunc_powf, LibFunc_powl)) - return nullptr; - - // Propagate the math semantics from the call to any created instructions. - IRBuilder<>::FastMathFlagGuard Guard(B); - B.setFastMathFlags(Pow->getFastMathFlags()); - - // Shrink pow() to powf() if the arguments are single precision, - // unless the result is expected to be double precision. - if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) && - hasFloatVersion(Name)) - Shrunk = optimizeBinaryDoubleFP(Pow, B, true); - - // Evaluate special cases related to the base. - - // pow(1.0, x) -> 1.0 - if (match(Base, m_FPOne())) - return Base; - - if (Value *Exp = replacePowWithExp(Pow, B)) - return Exp; - - // Evaluate special cases related to the exponent. - - // pow(x, -1.0) -> 1.0 / x - if (match(Expo, m_SpecificFP(-1.0))) - return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal"); - - // pow(x, +/-0.0) -> 1.0 - if (match(Expo, m_AnyZeroFP())) - return ConstantFP::get(Ty, 1.0); - - // pow(x, 1.0) -> x - if (match(Expo, m_FPOne())) - return Base; - - // pow(x, 2.0) -> x * x - if (match(Expo, m_SpecificFP(2.0))) - return B.CreateFMul(Base, Base, "square"); - - if (Value *Sqrt = replacePowWithSqrt(Pow, B)) - return Sqrt; - - // pow(x, n) -> x * x * x * ... - const APFloat *ExpoF; - if (AllowApprox && match(Expo, m_APFloat(ExpoF))) { - // We limit to a max of 7 multiplications, thus the maximum exponent is 32. - // If the exponent is an integer+0.5 we generate a call to sqrt and an - // additional fmul. - // TODO: This whole transformation should be backend specific (e.g. some - // backends might prefer libcalls or the limit for the exponent might - // be different) and it should also consider optimizing for size. - APFloat LimF(ExpoF->getSemantics(), 33.0), - ExpoA(abs(*ExpoF)); - if (ExpoA.compare(LimF) == APFloat::cmpLessThan) { - // This transformation applies to integer or integer+0.5 exponents only. - // For integer+0.5, we create a sqrt(Base) call. - Value *Sqrt = nullptr; - if (!ExpoA.isInteger()) { - APFloat Expo2 = ExpoA; - // To check if ExpoA is an integer + 0.5, we add it to itself. If there - // is no floating point exception and the result is an integer, then - // ExpoA == integer + 0.5 - if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK) - return nullptr; - - if (!Expo2.isInteger()) - return nullptr; - - Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), - Pow->doesNotAccessMemory(), M, B, TLI); - } - - // We will memoize intermediate products of the Addition Chain. - Value *InnerChain[33] = {nullptr}; - InnerChain[1] = Base; - InnerChain[2] = B.CreateFMul(Base, Base, "square"); - - // We cannot readily convert a non-double type (like float) to a double. - // So we first convert it to something which could be converted to double. - ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored); - Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B); - - // Expand pow(x, y+0.5) to pow(x, y) * sqrt(x). - if (Sqrt) - FMul = B.CreateFMul(FMul, Sqrt); - - // If the exponent is negative, then get the reciprocal. - if (ExpoF->isNegative()) - FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal"); - - return FMul; - } - - APSInt IntExpo(32, /*isUnsigned=*/false); - // powf(x, n) -> powi(x, n) if n is a constant signed integer value - if (ExpoF->isInteger() && - ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) == - APFloat::opOK) { - return createPowWithIntegerExponent( - Base, ConstantInt::get(B.getInt32Ty(), IntExpo), M, B); - } - } - - // powf(x, itofp(y)) -> powi(x, y) - if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) { - Value *IntExpo = cast<Instruction>(Expo)->getOperand(0); - Value *NewExpo = nullptr; - unsigned BitWidth = IntExpo->getType()->getPrimitiveSizeInBits(); - if (isa<SIToFPInst>(Expo) && BitWidth == 32) - NewExpo = IntExpo; - else if (BitWidth < 32) - NewExpo = isa<SIToFPInst>(Expo) ? B.CreateSExt(IntExpo, B.getInt32Ty()) - : B.CreateZExt(IntExpo, B.getInt32Ty()); - if (NewExpo) - return createPowWithIntegerExponent(Base, NewExpo, M, B); - } - - return Shrunk; -} - -Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - Value *Ret = nullptr; - StringRef Name = Callee->getName(); - if (UnsafeFPShrink && Name == "exp2" && hasFloatVersion(Name)) - Ret = optimizeUnaryDoubleFP(CI, B, true); - - Value *Op = CI->getArgOperand(0); - // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 - // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 - LibFunc LdExp = LibFunc_ldexpl; - if (Op->getType()->isFloatTy()) - LdExp = LibFunc_ldexpf; - else if (Op->getType()->isDoubleTy()) - LdExp = LibFunc_ldexp; - - if (TLI->has(LdExp)) { - Value *LdExpArg = nullptr; - if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) { - if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) - LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty()); - } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) { - if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) - LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty()); - } - - if (LdExpArg) { - Constant *One = ConstantFP::get(CI->getContext(), APFloat(1.0f)); - if (!Op->getType()->isFloatTy()) - One = ConstantExpr::getFPExtend(One, Op->getType()); - - Module *M = CI->getModule(); - FunctionCallee NewCallee = M->getOrInsertFunction( - TLI->getName(LdExp), Op->getType(), Op->getType(), B.getInt32Ty()); - CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg}); - if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; - } - } - return Ret; -} - -Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { - // If we can shrink the call to a float function rather than a double - // function, do that first. - Function *Callee = CI->getCalledFunction(); - StringRef Name = Callee->getName(); - if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name)) - if (Value *Ret = optimizeBinaryDoubleFP(CI, B)) - return Ret; - - // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to - // the intrinsics for improved optimization (for example, vectorization). - // No-signed-zeros is implied by the definitions of fmax/fmin themselves. - // From the C standard draft WG14/N1256: - // "Ideally, fmax would be sensitive to the sign of zero, for example - // fmax(-0.0, +0.0) would return +0; however, implementation in software - // might be impractical." - IRBuilder<>::FastMathFlagGuard Guard(B); - FastMathFlags FMF = CI->getFastMathFlags(); - FMF.setNoSignedZeros(); - B.setFastMathFlags(FMF); - - Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum - : Intrinsic::maxnum; - Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType()); - return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) }); -} - -Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - Value *Ret = nullptr; - StringRef Name = Callee->getName(); - if (UnsafeFPShrink && hasFloatVersion(Name)) - Ret = optimizeUnaryDoubleFP(CI, B, true); - - if (!CI->isFast()) - return Ret; - Value *Op1 = CI->getArgOperand(0); - auto *OpC = dyn_cast<CallInst>(Op1); - - // The earlier call must also be 'fast' in order to do these transforms. - if (!OpC || !OpC->isFast()) - return Ret; - - // log(pow(x,y)) -> y*log(x) - // This is only applicable to log, log2, log10. - if (Name != "log" && Name != "log2" && Name != "log10") - return Ret; - - IRBuilder<>::FastMathFlagGuard Guard(B); - FastMathFlags FMF; - FMF.setFast(); - B.setFastMathFlags(FMF); - - LibFunc Func; - Function *F = OpC->getCalledFunction(); - if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) && - Func == LibFunc_pow) || F->getIntrinsicID() == Intrinsic::pow)) - return B.CreateFMul(OpC->getArgOperand(1), - emitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B, - Callee->getAttributes()), "mul"); - - // log(exp2(y)) -> y*log(2) - if (F && Name == "log" && TLI->getLibFunc(F->getName(), Func) && - TLI->has(Func) && Func == LibFunc_exp2) - return B.CreateFMul( - OpC->getArgOperand(0), - emitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0), - Callee->getName(), B, Callee->getAttributes()), - "logmul"); - return Ret; -} - -Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - Value *Ret = nullptr; - // TODO: Once we have a way (other than checking for the existince of the - // libcall) to tell whether our target can lower @llvm.sqrt, relax the - // condition below. - if (TLI->has(LibFunc_sqrtf) && (Callee->getName() == "sqrt" || - Callee->getIntrinsicID() == Intrinsic::sqrt)) - Ret = optimizeUnaryDoubleFP(CI, B, true); - - if (!CI->isFast()) - return Ret; - - Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0)); - if (!I || I->getOpcode() != Instruction::FMul || !I->isFast()) - return Ret; - - // We're looking for a repeated factor in a multiplication tree, - // so we can do this fold: sqrt(x * x) -> fabs(x); - // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y). - Value *Op0 = I->getOperand(0); - Value *Op1 = I->getOperand(1); - Value *RepeatOp = nullptr; - Value *OtherOp = nullptr; - if (Op0 == Op1) { - // Simple match: the operands of the multiply are identical. - RepeatOp = Op0; - } else { - // Look for a more complicated pattern: one of the operands is itself - // a multiply, so search for a common factor in that multiply. - // Note: We don't bother looking any deeper than this first level or for - // variations of this pattern because instcombine's visitFMUL and/or the - // reassociation pass should give us this form. - Value *OtherMul0, *OtherMul1; - if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) { - // Pattern: sqrt((x * y) * z) - if (OtherMul0 == OtherMul1 && cast<Instruction>(Op0)->isFast()) { - // Matched: sqrt((x * x) * z) - RepeatOp = OtherMul0; - OtherOp = Op1; - } - } - } - if (!RepeatOp) - return Ret; - - // Fast math flags for any created instructions should match the sqrt - // and multiply. - IRBuilder<>::FastMathFlagGuard Guard(B); - B.setFastMathFlags(I->getFastMathFlags()); - - // If we found a repeated factor, hoist it out of the square root and - // replace it with the fabs of that factor. - Module *M = Callee->getParent(); - Type *ArgType = I->getType(); - Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType); - Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs"); - if (OtherOp) { - // If we found a non-repeated factor, we still need to get its square - // root. We then multiply that by the value that was simplified out - // of the square root calculation. - Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType); - Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt"); - return B.CreateFMul(FabsCall, SqrtCall); - } - return FabsCall; -} - -// TODO: Generalize to handle any trig function and its inverse. -Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - Value *Ret = nullptr; - StringRef Name = Callee->getName(); - if (UnsafeFPShrink && Name == "tan" && hasFloatVersion(Name)) - Ret = optimizeUnaryDoubleFP(CI, B, true); - - Value *Op1 = CI->getArgOperand(0); - auto *OpC = dyn_cast<CallInst>(Op1); - if (!OpC) - return Ret; - - // Both calls must be 'fast' in order to remove them. - if (!CI->isFast() || !OpC->isFast()) - return Ret; - - // tan(atan(x)) -> x - // tanf(atanf(x)) -> x - // tanl(atanl(x)) -> x - LibFunc Func; - Function *F = OpC->getCalledFunction(); - if (F && TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) && - ((Func == LibFunc_atan && Callee->getName() == "tan") || - (Func == LibFunc_atanf && Callee->getName() == "tanf") || - (Func == LibFunc_atanl && Callee->getName() == "tanl"))) - Ret = OpC->getArgOperand(0); - return Ret; -} - -static bool isTrigLibCall(CallInst *CI) { - // We can only hope to do anything useful if we can ignore things like errno - // and floating-point exceptions. - // We already checked the prototype. - return CI->hasFnAttr(Attribute::NoUnwind) && - CI->hasFnAttr(Attribute::ReadNone); -} - -static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, - bool UseFloat, Value *&Sin, Value *&Cos, - Value *&SinCos) { - Type *ArgTy = Arg->getType(); - Type *ResTy; - StringRef Name; - - Triple T(OrigCallee->getParent()->getTargetTriple()); - if (UseFloat) { - Name = "__sincospif_stret"; - - assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now"); - // x86_64 can't use {float, float} since that would be returned in both - // xmm0 and xmm1, which isn't what a real struct would do. - ResTy = T.getArch() == Triple::x86_64 - ? static_cast<Type *>(VectorType::get(ArgTy, 2)) - : static_cast<Type *>(StructType::get(ArgTy, ArgTy)); - } else { - Name = "__sincospi_stret"; - ResTy = StructType::get(ArgTy, ArgTy); - } - - Module *M = OrigCallee->getParent(); - FunctionCallee Callee = - M->getOrInsertFunction(Name, OrigCallee->getAttributes(), ResTy, ArgTy); - - if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) { - // If the argument is an instruction, it must dominate all uses so put our - // sincos call there. - B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator()); - } else { - // Otherwise (e.g. for a constant) the beginning of the function is as - // good a place as any. - BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock(); - B.SetInsertPoint(&EntryBB, EntryBB.begin()); - } - - SinCos = B.CreateCall(Callee, Arg, "sincospi"); - - if (SinCos->getType()->isStructTy()) { - Sin = B.CreateExtractValue(SinCos, 0, "sinpi"); - Cos = B.CreateExtractValue(SinCos, 1, "cospi"); - } else { - Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0), - "sinpi"); - Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1), - "cospi"); - } -} - -Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilder<> &B) { - // Make sure the prototype is as expected, otherwise the rest of the - // function is probably invalid and likely to abort. - if (!isTrigLibCall(CI)) - return nullptr; - - Value *Arg = CI->getArgOperand(0); - SmallVector<CallInst *, 1> SinCalls; - SmallVector<CallInst *, 1> CosCalls; - SmallVector<CallInst *, 1> SinCosCalls; - - bool IsFloat = Arg->getType()->isFloatTy(); - - // Look for all compatible sinpi, cospi and sincospi calls with the same - // argument. If there are enough (in some sense) we can make the - // substitution. - Function *F = CI->getFunction(); - for (User *U : Arg->users()) - classifyArgUse(U, F, IsFloat, SinCalls, CosCalls, SinCosCalls); - - // It's only worthwhile if both sinpi and cospi are actually used. - if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty())) - return nullptr; - - Value *Sin, *Cos, *SinCos; - insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos, SinCos); - - auto replaceTrigInsts = [this](SmallVectorImpl<CallInst *> &Calls, - Value *Res) { - for (CallInst *C : Calls) - replaceAllUsesWith(C, Res); - }; - - replaceTrigInsts(SinCalls, Sin); - replaceTrigInsts(CosCalls, Cos); - replaceTrigInsts(SinCosCalls, SinCos); - - return nullptr; -} - -void LibCallSimplifier::classifyArgUse( - Value *Val, Function *F, bool IsFloat, - SmallVectorImpl<CallInst *> &SinCalls, - SmallVectorImpl<CallInst *> &CosCalls, - SmallVectorImpl<CallInst *> &SinCosCalls) { - CallInst *CI = dyn_cast<CallInst>(Val); - - if (!CI) - return; - - // Don't consider calls in other functions. - if (CI->getFunction() != F) - return; - - Function *Callee = CI->getCalledFunction(); - LibFunc Func; - if (!Callee || !TLI->getLibFunc(*Callee, Func) || !TLI->has(Func) || - !isTrigLibCall(CI)) - return; - - if (IsFloat) { - if (Func == LibFunc_sinpif) - SinCalls.push_back(CI); - else if (Func == LibFunc_cospif) - CosCalls.push_back(CI); - else if (Func == LibFunc_sincospif_stret) - SinCosCalls.push_back(CI); - } else { - if (Func == LibFunc_sinpi) - SinCalls.push_back(CI); - else if (Func == LibFunc_cospi) - CosCalls.push_back(CI); - else if (Func == LibFunc_sincospi_stret) - SinCosCalls.push_back(CI); - } -} - -//===----------------------------------------------------------------------===// -// Integer Library Call Optimizations -//===----------------------------------------------------------------------===// - -Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) { - // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0 - Value *Op = CI->getArgOperand(0); - Type *ArgType = Op->getType(); - Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(), - Intrinsic::cttz, ArgType); - Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz"); - V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1)); - V = B.CreateIntCast(V, B.getInt32Ty(), false); - - Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType)); - return B.CreateSelect(Cond, V, B.getInt32(0)); -} - -Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilder<> &B) { - // fls(x) -> (i32)(sizeInBits(x) - llvm.ctlz(x, false)) - Value *Op = CI->getArgOperand(0); - Type *ArgType = Op->getType(); - Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(), - Intrinsic::ctlz, ArgType); - Value *V = B.CreateCall(F, {Op, B.getFalse()}, "ctlz"); - V = B.CreateSub(ConstantInt::get(V->getType(), ArgType->getIntegerBitWidth()), - V); - return B.CreateIntCast(V, CI->getType(), false); -} - -Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) { - // abs(x) -> x <s 0 ? -x : x - // The negation has 'nsw' because abs of INT_MIN is undefined. - Value *X = CI->getArgOperand(0); - Value *IsNeg = B.CreateICmpSLT(X, Constant::getNullValue(X->getType())); - Value *NegX = B.CreateNSWNeg(X, "neg"); - return B.CreateSelect(IsNeg, NegX, X); -} - -Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) { - // isdigit(c) -> (c-'0') <u 10 - Value *Op = CI->getArgOperand(0); - Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp"); - Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit"); - return B.CreateZExt(Op, CI->getType()); -} - -Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilder<> &B) { - // isascii(c) -> c <u 128 - Value *Op = CI->getArgOperand(0); - Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii"); - return B.CreateZExt(Op, CI->getType()); -} - -Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilder<> &B) { - // toascii(c) -> c & 0x7f - return B.CreateAnd(CI->getArgOperand(0), - ConstantInt::get(CI->getType(), 0x7F)); -} - -Value *LibCallSimplifier::optimizeAtoi(CallInst *CI, IRBuilder<> &B) { - StringRef Str; - if (!getConstantStringInfo(CI->getArgOperand(0), Str)) - return nullptr; - - return convertStrToNumber(CI, Str, 10); -} - -Value *LibCallSimplifier::optimizeStrtol(CallInst *CI, IRBuilder<> &B) { - StringRef Str; - if (!getConstantStringInfo(CI->getArgOperand(0), Str)) - return nullptr; - - if (!isa<ConstantPointerNull>(CI->getArgOperand(1))) - return nullptr; - - if (ConstantInt *CInt = dyn_cast<ConstantInt>(CI->getArgOperand(2))) { - return convertStrToNumber(CI, Str, CInt->getSExtValue()); - } - - return nullptr; -} - -//===----------------------------------------------------------------------===// -// Formatting and IO Library Call Optimizations -//===----------------------------------------------------------------------===// - -static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg); - -Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B, - int StreamArg) { - Function *Callee = CI->getCalledFunction(); - // Error reporting calls should be cold, mark them as such. - // This applies even to non-builtin calls: it is only a hint and applies to - // functions that the frontend might not understand as builtins. - - // This heuristic was suggested in: - // Improving Static Branch Prediction in a Compiler - // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu - // Proceedings of PACT'98, Oct. 1998, IEEE - if (!CI->hasFnAttr(Attribute::Cold) && - isReportingError(Callee, CI, StreamArg)) { - CI->addAttribute(AttributeList::FunctionIndex, Attribute::Cold); - } - - return nullptr; -} - -static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) { - if (!Callee || !Callee->isDeclaration()) - return false; - - if (StreamArg < 0) - return true; - - // These functions might be considered cold, but only if their stream - // argument is stderr. - - if (StreamArg >= (int)CI->getNumArgOperands()) - return false; - LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg)); - if (!LI) - return false; - GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand()); - if (!GV || !GV->isDeclaration()) - return false; - return GV->getName() == "stderr"; -} - -Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) { - // Check for a fixed format string. - StringRef FormatStr; - if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr)) - return nullptr; - - // Empty format string -> noop. - if (FormatStr.empty()) // Tolerate printf's declared void. - return CI->use_empty() ? (Value *)CI : ConstantInt::get(CI->getType(), 0); - - // Do not do any of the following transformations if the printf return value - // is used, in general the printf return value is not compatible with either - // putchar() or puts(). - if (!CI->use_empty()) - return nullptr; - - // printf("x") -> putchar('x'), even for "%" and "%%". - if (FormatStr.size() == 1 || FormatStr == "%%") - return emitPutChar(B.getInt32(FormatStr[0]), B, TLI); - - // printf("%s", "a") --> putchar('a') - if (FormatStr == "%s" && CI->getNumArgOperands() > 1) { - StringRef ChrStr; - if (!getConstantStringInfo(CI->getOperand(1), ChrStr)) - return nullptr; - if (ChrStr.size() != 1) - return nullptr; - return emitPutChar(B.getInt32(ChrStr[0]), B, TLI); - } - - // printf("foo\n") --> puts("foo") - if (FormatStr[FormatStr.size() - 1] == '\n' && - FormatStr.find('%') == StringRef::npos) { // No format characters. - // Create a string literal with no \n on it. We expect the constant merge - // pass to be run after this pass, to merge duplicate strings. - FormatStr = FormatStr.drop_back(); - Value *GV = B.CreateGlobalString(FormatStr, "str"); - return emitPutS(GV, B, TLI); - } - - // Optimize specific format strings. - // printf("%c", chr) --> putchar(chr) - if (FormatStr == "%c" && CI->getNumArgOperands() > 1 && - CI->getArgOperand(1)->getType()->isIntegerTy()) - return emitPutChar(CI->getArgOperand(1), B, TLI); - - // printf("%s\n", str) --> puts(str) - if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && - CI->getArgOperand(1)->getType()->isPointerTy()) - return emitPutS(CI->getArgOperand(1), B, TLI); - return nullptr; -} - -Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) { - - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - if (Value *V = optimizePrintFString(CI, B)) { - return V; - } - - // printf(format, ...) -> iprintf(format, ...) if no floating point - // arguments. - if (TLI->has(LibFunc_iprintf) && !callHasFloatingPointArgument(CI)) { - Module *M = B.GetInsertBlock()->getParent()->getParent(); - FunctionCallee IPrintFFn = - M->getOrInsertFunction("iprintf", FT, Callee->getAttributes()); - CallInst *New = cast<CallInst>(CI->clone()); - New->setCalledFunction(IPrintFFn); - B.Insert(New); - return New; - } - - // printf(format, ...) -> __small_printf(format, ...) if no 128-bit floating point - // arguments. - if (TLI->has(LibFunc_small_printf) && !callHasFP128Argument(CI)) { - Module *M = B.GetInsertBlock()->getParent()->getParent(); - auto SmallPrintFFn = - M->getOrInsertFunction(TLI->getName(LibFunc_small_printf), - FT, Callee->getAttributes()); - CallInst *New = cast<CallInst>(CI->clone()); - New->setCalledFunction(SmallPrintFFn); - B.Insert(New); - return New; - } - - return nullptr; -} - -Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) { - // Check for a fixed format string. - StringRef FormatStr; - if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) - return nullptr; - - // If we just have a format string (nothing else crazy) transform it. - if (CI->getNumArgOperands() == 2) { - // Make sure there's no % in the constant array. We could try to handle - // %% -> % in the future if we cared. - if (FormatStr.find('%') != StringRef::npos) - return nullptr; // we found a format specifier, bail out. - - // sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1) - B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, - ConstantInt::get(DL.getIntPtrType(CI->getContext()), - FormatStr.size() + 1)); // Copy the null byte. - return ConstantInt::get(CI->getType(), FormatStr.size()); - } - - // The remaining optimizations require the format string to be "%s" or "%c" - // and have an extra operand. - if (FormatStr.size() != 2 || FormatStr[0] != '%' || - CI->getNumArgOperands() < 3) - return nullptr; - - // Decode the second character of the format string. - if (FormatStr[1] == 'c') { - // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 - if (!CI->getArgOperand(2)->getType()->isIntegerTy()) - return nullptr; - Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); - Value *Ptr = castToCStr(CI->getArgOperand(0), B); - B.CreateStore(V, Ptr); - Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul"); - B.CreateStore(B.getInt8(0), Ptr); - - return ConstantInt::get(CI->getType(), 1); - } - - if (FormatStr[1] == 's') { - // sprintf(dest, "%s", str) -> llvm.memcpy(align 1 dest, align 1 str, - // strlen(str)+1) - if (!CI->getArgOperand(2)->getType()->isPointerTy()) - return nullptr; - - Value *Len = emitStrLen(CI->getArgOperand(2), B, DL, TLI); - if (!Len) - return nullptr; - Value *IncLen = - B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); - B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(2), 1, IncLen); - - // The sprintf result is the unincremented number of bytes in the string. - return B.CreateIntCast(Len, CI->getType(), false); - } - return nullptr; -} - -Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - if (Value *V = optimizeSPrintFString(CI, B)) { - return V; - } - - // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating - // point arguments. - if (TLI->has(LibFunc_siprintf) && !callHasFloatingPointArgument(CI)) { - Module *M = B.GetInsertBlock()->getParent()->getParent(); - FunctionCallee SIPrintFFn = - M->getOrInsertFunction("siprintf", FT, Callee->getAttributes()); - CallInst *New = cast<CallInst>(CI->clone()); - New->setCalledFunction(SIPrintFFn); - B.Insert(New); - return New; - } - - // sprintf(str, format, ...) -> __small_sprintf(str, format, ...) if no 128-bit - // floating point arguments. - if (TLI->has(LibFunc_small_sprintf) && !callHasFP128Argument(CI)) { - Module *M = B.GetInsertBlock()->getParent()->getParent(); - auto SmallSPrintFFn = - M->getOrInsertFunction(TLI->getName(LibFunc_small_sprintf), - FT, Callee->getAttributes()); - CallInst *New = cast<CallInst>(CI->clone()); - New->setCalledFunction(SmallSPrintFFn); - B.Insert(New); - return New; - } - - return nullptr; -} - -Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) { - // Check for a fixed format string. - StringRef FormatStr; - if (!getConstantStringInfo(CI->getArgOperand(2), FormatStr)) - return nullptr; - - // Check for size - ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1)); - if (!Size) - return nullptr; - - uint64_t N = Size->getZExtValue(); - - // If we just have a format string (nothing else crazy) transform it. - if (CI->getNumArgOperands() == 3) { - // Make sure there's no % in the constant array. We could try to handle - // %% -> % in the future if we cared. - if (FormatStr.find('%') != StringRef::npos) - return nullptr; // we found a format specifier, bail out. - - if (N == 0) - return ConstantInt::get(CI->getType(), FormatStr.size()); - else if (N < FormatStr.size() + 1) - return nullptr; - - // snprintf(dst, size, fmt) -> llvm.memcpy(align 1 dst, align 1 fmt, - // strlen(fmt)+1) - B.CreateMemCpy( - CI->getArgOperand(0), 1, CI->getArgOperand(2), 1, - ConstantInt::get(DL.getIntPtrType(CI->getContext()), - FormatStr.size() + 1)); // Copy the null byte. - return ConstantInt::get(CI->getType(), FormatStr.size()); - } - - // The remaining optimizations require the format string to be "%s" or "%c" - // and have an extra operand. - if (FormatStr.size() == 2 && FormatStr[0] == '%' && - CI->getNumArgOperands() == 4) { - - // Decode the second character of the format string. - if (FormatStr[1] == 'c') { - if (N == 0) - return ConstantInt::get(CI->getType(), 1); - else if (N == 1) - return nullptr; - - // snprintf(dst, size, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 - if (!CI->getArgOperand(3)->getType()->isIntegerTy()) - return nullptr; - Value *V = B.CreateTrunc(CI->getArgOperand(3), B.getInt8Ty(), "char"); - Value *Ptr = castToCStr(CI->getArgOperand(0), B); - B.CreateStore(V, Ptr); - Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul"); - B.CreateStore(B.getInt8(0), Ptr); - - return ConstantInt::get(CI->getType(), 1); - } - - if (FormatStr[1] == 's') { - // snprintf(dest, size, "%s", str) to llvm.memcpy(dest, str, len+1, 1) - StringRef Str; - if (!getConstantStringInfo(CI->getArgOperand(3), Str)) - return nullptr; - - if (N == 0) - return ConstantInt::get(CI->getType(), Str.size()); - else if (N < Str.size() + 1) - return nullptr; - - B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(3), 1, - ConstantInt::get(CI->getType(), Str.size() + 1)); - - // The snprintf result is the unincremented number of bytes in the string. - return ConstantInt::get(CI->getType(), Str.size()); - } - } - return nullptr; -} - -Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilder<> &B) { - if (Value *V = optimizeSnPrintFString(CI, B)) { - return V; - } - - return nullptr; -} - -Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) { - optimizeErrorReporting(CI, B, 0); - - // All the optimizations depend on the format string. - StringRef FormatStr; - if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) - return nullptr; - - // Do not do any of the following transformations if the fprintf return - // value is used, in general the fprintf return value is not compatible - // with fwrite(), fputc() or fputs(). - if (!CI->use_empty()) - return nullptr; - - // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) - if (CI->getNumArgOperands() == 2) { - // Could handle %% -> % if we cared. - if (FormatStr.find('%') != StringRef::npos) - return nullptr; // We found a format specifier. - - return emitFWrite( - CI->getArgOperand(1), - ConstantInt::get(DL.getIntPtrType(CI->getContext()), FormatStr.size()), - CI->getArgOperand(0), B, DL, TLI); - } - - // The remaining optimizations require the format string to be "%s" or "%c" - // and have an extra operand. - if (FormatStr.size() != 2 || FormatStr[0] != '%' || - CI->getNumArgOperands() < 3) - return nullptr; - - // Decode the second character of the format string. - if (FormatStr[1] == 'c') { - // fprintf(F, "%c", chr) --> fputc(chr, F) - if (!CI->getArgOperand(2)->getType()->isIntegerTy()) - return nullptr; - return emitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI); - } - - if (FormatStr[1] == 's') { - // fprintf(F, "%s", str) --> fputs(str, F) - if (!CI->getArgOperand(2)->getType()->isPointerTy()) - return nullptr; - return emitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI); - } - return nullptr; -} - -Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - if (Value *V = optimizeFPrintFString(CI, B)) { - return V; - } - - // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no - // floating point arguments. - if (TLI->has(LibFunc_fiprintf) && !callHasFloatingPointArgument(CI)) { - Module *M = B.GetInsertBlock()->getParent()->getParent(); - FunctionCallee FIPrintFFn = - M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes()); - CallInst *New = cast<CallInst>(CI->clone()); - New->setCalledFunction(FIPrintFFn); - B.Insert(New); - return New; - } - - // fprintf(stream, format, ...) -> __small_fprintf(stream, format, ...) if no - // 128-bit floating point arguments. - if (TLI->has(LibFunc_small_fprintf) && !callHasFP128Argument(CI)) { - Module *M = B.GetInsertBlock()->getParent()->getParent(); - auto SmallFPrintFFn = - M->getOrInsertFunction(TLI->getName(LibFunc_small_fprintf), - FT, Callee->getAttributes()); - CallInst *New = cast<CallInst>(CI->clone()); - New->setCalledFunction(SmallFPrintFFn); - B.Insert(New); - return New; - } - - return nullptr; -} - -Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) { - optimizeErrorReporting(CI, B, 3); - - // Get the element size and count. - ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); - ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); - if (SizeC && CountC) { - uint64_t Bytes = SizeC->getZExtValue() * CountC->getZExtValue(); - - // If this is writing zero records, remove the call (it's a noop). - if (Bytes == 0) - return ConstantInt::get(CI->getType(), 0); - - // If this is writing one byte, turn it into fputc. - // This optimisation is only valid, if the return value is unused. - if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) - Value *Char = B.CreateLoad(B.getInt8Ty(), - castToCStr(CI->getArgOperand(0), B), "char"); - Value *NewCI = emitFPutC(Char, CI->getArgOperand(3), B, TLI); - return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr; - } - } - - if (isLocallyOpenedFile(CI->getArgOperand(3), CI, B, TLI)) - return emitFWriteUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), CI->getArgOperand(3), B, DL, - TLI); - - return nullptr; -} - -Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) { - optimizeErrorReporting(CI, B, 1); - - // Don't rewrite fputs to fwrite when optimising for size because fwrite - // requires more arguments and thus extra MOVs are required. - bool OptForSize = CI->getFunction()->hasOptSize() || - llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); - if (OptForSize) - return nullptr; - - // Check if has any use - if (!CI->use_empty()) { - if (isLocallyOpenedFile(CI->getArgOperand(1), CI, B, TLI)) - return emitFPutSUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), B, - TLI); - else - // We can't optimize if return value is used. - return nullptr; - } - - // fputs(s,F) --> fwrite(s,strlen(s),1,F) - uint64_t Len = GetStringLength(CI->getArgOperand(0)); - if (!Len) - return nullptr; - - // Known to have no uses (see above). - return emitFWrite( - CI->getArgOperand(0), - ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1), - CI->getArgOperand(1), B, DL, TLI); -} - -Value *LibCallSimplifier::optimizeFPutc(CallInst *CI, IRBuilder<> &B) { - optimizeErrorReporting(CI, B, 1); - - if (isLocallyOpenedFile(CI->getArgOperand(1), CI, B, TLI)) - return emitFPutCUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), B, - TLI); - - return nullptr; -} - -Value *LibCallSimplifier::optimizeFGetc(CallInst *CI, IRBuilder<> &B) { - if (isLocallyOpenedFile(CI->getArgOperand(0), CI, B, TLI)) - return emitFGetCUnlocked(CI->getArgOperand(0), B, TLI); - - return nullptr; -} - -Value *LibCallSimplifier::optimizeFGets(CallInst *CI, IRBuilder<> &B) { - if (isLocallyOpenedFile(CI->getArgOperand(2), CI, B, TLI)) - return emitFGetSUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, TLI); - - return nullptr; -} - -Value *LibCallSimplifier::optimizeFRead(CallInst *CI, IRBuilder<> &B) { - if (isLocallyOpenedFile(CI->getArgOperand(3), CI, B, TLI)) - return emitFReadUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), CI->getArgOperand(3), B, DL, - TLI); - - return nullptr; -} - -Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) { - if (!CI->use_empty()) - return nullptr; - - // Check for a constant string. - // puts("") -> putchar('\n') - StringRef Str; - if (getConstantStringInfo(CI->getArgOperand(0), Str) && Str.empty()) - return emitPutChar(B.getInt32('\n'), B, TLI); - - return nullptr; -} - -bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) { - LibFunc Func; - SmallString<20> FloatFuncName = FuncName; - FloatFuncName += 'f'; - if (TLI->getLibFunc(FloatFuncName, Func)) - return TLI->has(Func); - return false; -} - -Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, - IRBuilder<> &Builder) { - LibFunc Func; - Function *Callee = CI->getCalledFunction(); - // Check for string/memory library functions. - if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) { - // Make sure we never change the calling convention. - assert((ignoreCallingConv(Func) || - isCallingConvCCompatible(CI)) && - "Optimizing string/memory libcall would change the calling convention"); - switch (Func) { - case LibFunc_strcat: - return optimizeStrCat(CI, Builder); - case LibFunc_strncat: - return optimizeStrNCat(CI, Builder); - case LibFunc_strchr: - return optimizeStrChr(CI, Builder); - case LibFunc_strrchr: - return optimizeStrRChr(CI, Builder); - case LibFunc_strcmp: - return optimizeStrCmp(CI, Builder); - case LibFunc_strncmp: - return optimizeStrNCmp(CI, Builder); - case LibFunc_strcpy: - return optimizeStrCpy(CI, Builder); - case LibFunc_stpcpy: - return optimizeStpCpy(CI, Builder); - case LibFunc_strncpy: - return optimizeStrNCpy(CI, Builder); - case LibFunc_strlen: - return optimizeStrLen(CI, Builder); - case LibFunc_strpbrk: - return optimizeStrPBrk(CI, Builder); - case LibFunc_strtol: - case LibFunc_strtod: - case LibFunc_strtof: - case LibFunc_strtoul: - case LibFunc_strtoll: - case LibFunc_strtold: - case LibFunc_strtoull: - return optimizeStrTo(CI, Builder); - case LibFunc_strspn: - return optimizeStrSpn(CI, Builder); - case LibFunc_strcspn: - return optimizeStrCSpn(CI, Builder); - case LibFunc_strstr: - return optimizeStrStr(CI, Builder); - case LibFunc_memchr: - return optimizeMemChr(CI, Builder); - case LibFunc_bcmp: - return optimizeBCmp(CI, Builder); - case LibFunc_memcmp: - return optimizeMemCmp(CI, Builder); - case LibFunc_memcpy: - return optimizeMemCpy(CI, Builder); - case LibFunc_memmove: - return optimizeMemMove(CI, Builder); - case LibFunc_memset: - return optimizeMemSet(CI, Builder); - case LibFunc_realloc: - return optimizeRealloc(CI, Builder); - case LibFunc_wcslen: - return optimizeWcslen(CI, Builder); - default: - break; - } - } - return nullptr; -} - -Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI, - LibFunc Func, - IRBuilder<> &Builder) { - // Don't optimize calls that require strict floating point semantics. - if (CI->isStrictFP()) - return nullptr; - - if (Value *V = optimizeTrigReflections(CI, Func, Builder)) - return V; - - switch (Func) { - case LibFunc_sinpif: - case LibFunc_sinpi: - case LibFunc_cospif: - case LibFunc_cospi: - return optimizeSinCosPi(CI, Builder); - case LibFunc_powf: - case LibFunc_pow: - case LibFunc_powl: - return optimizePow(CI, Builder); - case LibFunc_exp2l: - case LibFunc_exp2: - case LibFunc_exp2f: - return optimizeExp2(CI, Builder); - case LibFunc_fabsf: - case LibFunc_fabs: - case LibFunc_fabsl: - return replaceUnaryCall(CI, Builder, Intrinsic::fabs); - case LibFunc_sqrtf: - case LibFunc_sqrt: - case LibFunc_sqrtl: - return optimizeSqrt(CI, Builder); - case LibFunc_log: - case LibFunc_log10: - case LibFunc_log1p: - case LibFunc_log2: - case LibFunc_logb: - return optimizeLog(CI, Builder); - case LibFunc_tan: - case LibFunc_tanf: - case LibFunc_tanl: - return optimizeTan(CI, Builder); - case LibFunc_ceil: - return replaceUnaryCall(CI, Builder, Intrinsic::ceil); - case LibFunc_floor: - return replaceUnaryCall(CI, Builder, Intrinsic::floor); - case LibFunc_round: - return replaceUnaryCall(CI, Builder, Intrinsic::round); - case LibFunc_nearbyint: - return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint); - case LibFunc_rint: - return replaceUnaryCall(CI, Builder, Intrinsic::rint); - case LibFunc_trunc: - return replaceUnaryCall(CI, Builder, Intrinsic::trunc); - case LibFunc_acos: - case LibFunc_acosh: - case LibFunc_asin: - case LibFunc_asinh: - case LibFunc_atan: - case LibFunc_atanh: - case LibFunc_cbrt: - case LibFunc_cosh: - case LibFunc_exp: - case LibFunc_exp10: - case LibFunc_expm1: - case LibFunc_cos: - case LibFunc_sin: - case LibFunc_sinh: - case LibFunc_tanh: - if (UnsafeFPShrink && hasFloatVersion(CI->getCalledFunction()->getName())) - return optimizeUnaryDoubleFP(CI, Builder, true); - return nullptr; - case LibFunc_copysign: - if (hasFloatVersion(CI->getCalledFunction()->getName())) - return optimizeBinaryDoubleFP(CI, Builder); - return nullptr; - case LibFunc_fminf: - case LibFunc_fmin: - case LibFunc_fminl: - case LibFunc_fmaxf: - case LibFunc_fmax: - case LibFunc_fmaxl: - return optimizeFMinFMax(CI, Builder); - case LibFunc_cabs: - case LibFunc_cabsf: - case LibFunc_cabsl: - return optimizeCAbs(CI, Builder); - default: - return nullptr; - } -} - -Value *LibCallSimplifier::optimizeCall(CallInst *CI) { - // TODO: Split out the code below that operates on FP calls so that - // we can all non-FP calls with the StrictFP attribute to be - // optimized. - if (CI->isNoBuiltin()) - return nullptr; - - LibFunc Func; - Function *Callee = CI->getCalledFunction(); - - SmallVector<OperandBundleDef, 2> OpBundles; - CI->getOperandBundlesAsDefs(OpBundles); - IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles); - bool isCallingConvC = isCallingConvCCompatible(CI); - - // Command-line parameter overrides instruction attribute. - // This can't be moved to optimizeFloatingPointLibCall() because it may be - // used by the intrinsic optimizations. - if (EnableUnsafeFPShrink.getNumOccurrences() > 0) - UnsafeFPShrink = EnableUnsafeFPShrink; - else if (isa<FPMathOperator>(CI) && CI->isFast()) - UnsafeFPShrink = true; - - // First, check for intrinsics. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { - if (!isCallingConvC) - return nullptr; - // The FP intrinsics have corresponding constrained versions so we don't - // need to check for the StrictFP attribute here. - switch (II->getIntrinsicID()) { - case Intrinsic::pow: - return optimizePow(CI, Builder); - case Intrinsic::exp2: - return optimizeExp2(CI, Builder); - case Intrinsic::log: - return optimizeLog(CI, Builder); - case Intrinsic::sqrt: - return optimizeSqrt(CI, Builder); - // TODO: Use foldMallocMemset() with memset intrinsic. - default: - return nullptr; - } - } - - // Also try to simplify calls to fortified library functions. - if (Value *SimplifiedFortifiedCI = FortifiedSimplifier.optimizeCall(CI)) { - // Try to further simplify the result. - CallInst *SimplifiedCI = dyn_cast<CallInst>(SimplifiedFortifiedCI); - if (SimplifiedCI && SimplifiedCI->getCalledFunction()) { - // Use an IR Builder from SimplifiedCI if available instead of CI - // to guarantee we reach all uses we might replace later on. - IRBuilder<> TmpBuilder(SimplifiedCI); - if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, TmpBuilder)) { - // If we were able to further simplify, remove the now redundant call. - SimplifiedCI->replaceAllUsesWith(V); - eraseFromParent(SimplifiedCI); - return V; - } - } - return SimplifiedFortifiedCI; - } - - // Then check for known library functions. - if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) { - // We never change the calling convention. - if (!ignoreCallingConv(Func) && !isCallingConvC) - return nullptr; - if (Value *V = optimizeStringMemoryLibCall(CI, Builder)) - return V; - if (Value *V = optimizeFloatingPointLibCall(CI, Func, Builder)) - return V; - switch (Func) { - case LibFunc_ffs: - case LibFunc_ffsl: - case LibFunc_ffsll: - return optimizeFFS(CI, Builder); - case LibFunc_fls: - case LibFunc_flsl: - case LibFunc_flsll: - return optimizeFls(CI, Builder); - case LibFunc_abs: - case LibFunc_labs: - case LibFunc_llabs: - return optimizeAbs(CI, Builder); - case LibFunc_isdigit: - return optimizeIsDigit(CI, Builder); - case LibFunc_isascii: - return optimizeIsAscii(CI, Builder); - case LibFunc_toascii: - return optimizeToAscii(CI, Builder); - case LibFunc_atoi: - case LibFunc_atol: - case LibFunc_atoll: - return optimizeAtoi(CI, Builder); - case LibFunc_strtol: - case LibFunc_strtoll: - return optimizeStrtol(CI, Builder); - case LibFunc_printf: - return optimizePrintF(CI, Builder); - case LibFunc_sprintf: - return optimizeSPrintF(CI, Builder); - case LibFunc_snprintf: - return optimizeSnPrintF(CI, Builder); - case LibFunc_fprintf: - return optimizeFPrintF(CI, Builder); - case LibFunc_fwrite: - return optimizeFWrite(CI, Builder); - case LibFunc_fread: - return optimizeFRead(CI, Builder); - case LibFunc_fputs: - return optimizeFPuts(CI, Builder); - case LibFunc_fgets: - return optimizeFGets(CI, Builder); - case LibFunc_fputc: - return optimizeFPutc(CI, Builder); - case LibFunc_fgetc: - return optimizeFGetc(CI, Builder); - case LibFunc_puts: - return optimizePuts(CI, Builder); - case LibFunc_perror: - return optimizeErrorReporting(CI, Builder); - case LibFunc_vfprintf: - case LibFunc_fiprintf: - return optimizeErrorReporting(CI, Builder, 0); - default: - return nullptr; - } - } - return nullptr; -} - -LibCallSimplifier::LibCallSimplifier( - const DataLayout &DL, const TargetLibraryInfo *TLI, - OptimizationRemarkEmitter &ORE, - BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, - function_ref<void(Instruction *, Value *)> Replacer, - function_ref<void(Instruction *)> Eraser) - : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), BFI(BFI), PSI(PSI), - UnsafeFPShrink(false), Replacer(Replacer), Eraser(Eraser) {} - -void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) { - // Indirect through the replacer used in this instance. - Replacer(I, With); -} - -void LibCallSimplifier::eraseFromParent(Instruction *I) { - Eraser(I); -} - -// TODO: -// Additional cases that we need to add to this file: -// -// cbrt: -// * cbrt(expN(X)) -> expN(x/3) -// * cbrt(sqrt(x)) -> pow(x,1/6) -// * cbrt(cbrt(x)) -> pow(x,1/9) -// -// exp, expf, expl: -// * exp(log(x)) -> x -// -// log, logf, logl: -// * log(exp(x)) -> x -// * log(exp(y)) -> y*log(e) -// * log(exp10(y)) -> y*log(10) -// * log(sqrt(x)) -> 0.5*log(x) -// -// pow, powf, powl: -// * pow(sqrt(x),y) -> pow(x,y*0.5) -// * pow(pow(x,y),z)-> pow(x,y*z) -// -// signbit: -// * signbit(cnst) -> cnst' -// * signbit(nncst) -> 0 (if pstv is a non-negative constant) -// -// sqrt, sqrtf, sqrtl: -// * sqrt(expN(x)) -> expN(x*0.5) -// * sqrt(Nroot(x)) -> pow(x,1/(2*N)) -// * sqrt(pow(x,y)) -> pow(|x|,y*0.5) -// - -//===----------------------------------------------------------------------===// -// Fortified Library Call Optimizations -//===----------------------------------------------------------------------===// - -bool -FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, - unsigned ObjSizeOp, - Optional<unsigned> SizeOp, - Optional<unsigned> StrOp, - Optional<unsigned> FlagOp) { - // If this function takes a flag argument, the implementation may use it to - // perform extra checks. Don't fold into the non-checking variant. - if (FlagOp) { - ConstantInt *Flag = dyn_cast<ConstantInt>(CI->getArgOperand(*FlagOp)); - if (!Flag || !Flag->isZero()) - return false; - } - - if (SizeOp && CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(*SizeOp)) - return true; - - if (ConstantInt *ObjSizeCI = - dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) { - if (ObjSizeCI->isMinusOne()) - return true; - // If the object size wasn't -1 (unknown), bail out if we were asked to. - if (OnlyLowerUnknownSize) - return false; - if (StrOp) { - uint64_t Len = GetStringLength(CI->getArgOperand(*StrOp)); - // If the length is 0 we don't know how long it is and so we can't - // remove the check. - if (Len == 0) - return false; - return ObjSizeCI->getZExtValue() >= Len; - } - - if (SizeOp) { - if (ConstantInt *SizeCI = - dyn_cast<ConstantInt>(CI->getArgOperand(*SizeOp))) - return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue(); - } - } - return false; -} - -Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, - IRBuilder<> &B) { - if (isFortifiedCallFoldable(CI, 3, 2)) { - B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, - CI->getArgOperand(2)); - return CI->getArgOperand(0); - } - return nullptr; -} - -Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, - IRBuilder<> &B) { - if (isFortifiedCallFoldable(CI, 3, 2)) { - B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, - CI->getArgOperand(2)); - return CI->getArgOperand(0); - } - return nullptr; -} - -Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, - IRBuilder<> &B) { - // TODO: Try foldMallocMemset() here. - - if (isFortifiedCallFoldable(CI, 3, 2)) { - Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); - B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); - return CI->getArgOperand(0); - } - return nullptr; -} - -Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, - IRBuilder<> &B, - LibFunc Func) { - const DataLayout &DL = CI->getModule()->getDataLayout(); - Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1), - *ObjSize = CI->getArgOperand(2); - - // __stpcpy_chk(x,x,...) -> x+strlen(x) - if (Func == LibFunc_stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) { - Value *StrLen = emitStrLen(Src, B, DL, TLI); - return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr; - } - - // If a) we don't have any length information, or b) we know this will - // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our - // st[rp]cpy_chk call which may fail at runtime if the size is too long. - // TODO: It might be nice to get a maximum length out of the possible - // string lengths for varying. - if (isFortifiedCallFoldable(CI, 2, None, 1)) { - if (Func == LibFunc_strcpy_chk) - return emitStrCpy(Dst, Src, B, TLI); - else - return emitStpCpy(Dst, Src, B, TLI); - } - - if (OnlyLowerUnknownSize) - return nullptr; - - // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk. - uint64_t Len = GetStringLength(Src); - if (Len == 0) - return nullptr; - - Type *SizeTTy = DL.getIntPtrType(CI->getContext()); - Value *LenV = ConstantInt::get(SizeTTy, Len); - Value *Ret = emitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI); - // If the function was an __stpcpy_chk, and we were able to fold it into - // a __memcpy_chk, we still need to return the correct end pointer. - if (Ret && Func == LibFunc_stpcpy_chk) - return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1)); - return Ret; -} - -Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI, - IRBuilder<> &B, - LibFunc Func) { - if (isFortifiedCallFoldable(CI, 3, 2)) { - if (Func == LibFunc_strncpy_chk) - return emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, TLI); - else - return emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, TLI); - } - - return nullptr; -} - -Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI, - IRBuilder<> &B) { - if (isFortifiedCallFoldable(CI, 4, 3)) - return emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), CI->getArgOperand(3), B, TLI); - - return nullptr; -} - -Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI, - IRBuilder<> &B) { - if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) { - SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 5, CI->arg_end()); - return emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(4), VariadicArgs, B, TLI); - } - - return nullptr; -} - -Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI, - IRBuilder<> &B) { - if (isFortifiedCallFoldable(CI, 2, None, None, 1)) { - SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 4, CI->arg_end()); - return emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), VariadicArgs, - B, TLI); - } - - return nullptr; -} - -Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI, - IRBuilder<> &B) { - if (isFortifiedCallFoldable(CI, 2)) - return emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI); - - return nullptr; -} - -Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI, - IRBuilder<> &B) { - if (isFortifiedCallFoldable(CI, 3)) - return emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, TLI); - - return nullptr; -} - -Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI, - IRBuilder<> &B) { - if (isFortifiedCallFoldable(CI, 3)) - return emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, TLI); - - return nullptr; -} - -Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI, - IRBuilder<> &B) { - if (isFortifiedCallFoldable(CI, 3)) - return emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, TLI); - - return nullptr; -} - -Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI, - IRBuilder<> &B) { - if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) - return emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(4), CI->getArgOperand(5), B, TLI); - - return nullptr; -} - -Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI, - IRBuilder<> &B) { - if (isFortifiedCallFoldable(CI, 2, None, None, 1)) - return emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), - CI->getArgOperand(4), B, TLI); - - return nullptr; -} - -Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { - // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here. - // Some clang users checked for _chk libcall availability using: - // __has_builtin(__builtin___memcpy_chk) - // When compiling with -fno-builtin, this is always true. - // When passing -ffreestanding/-mkernel, which both imply -fno-builtin, we - // end up with fortified libcalls, which isn't acceptable in a freestanding - // environment which only provides their non-fortified counterparts. - // - // Until we change clang and/or teach external users to check for availability - // differently, disregard the "nobuiltin" attribute and TLI::has. - // - // PR23093. - - LibFunc Func; - Function *Callee = CI->getCalledFunction(); - - SmallVector<OperandBundleDef, 2> OpBundles; - CI->getOperandBundlesAsDefs(OpBundles); - IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles); - bool isCallingConvC = isCallingConvCCompatible(CI); - - // First, check that this is a known library functions and that the prototype - // is correct. - if (!TLI->getLibFunc(*Callee, Func)) - return nullptr; - - // We never change the calling convention. - if (!ignoreCallingConv(Func) && !isCallingConvC) - return nullptr; - - switch (Func) { - case LibFunc_memcpy_chk: - return optimizeMemCpyChk(CI, Builder); - case LibFunc_memmove_chk: - return optimizeMemMoveChk(CI, Builder); - case LibFunc_memset_chk: - return optimizeMemSetChk(CI, Builder); - case LibFunc_stpcpy_chk: - case LibFunc_strcpy_chk: - return optimizeStrpCpyChk(CI, Builder, Func); - case LibFunc_stpncpy_chk: - case LibFunc_strncpy_chk: - return optimizeStrpNCpyChk(CI, Builder, Func); - case LibFunc_memccpy_chk: - return optimizeMemCCpyChk(CI, Builder); - case LibFunc_snprintf_chk: - return optimizeSNPrintfChk(CI, Builder); - case LibFunc_sprintf_chk: - return optimizeSPrintfChk(CI, Builder); - case LibFunc_strcat_chk: - return optimizeStrCatChk(CI, Builder); - case LibFunc_strlcat_chk: - return optimizeStrLCat(CI, Builder); - case LibFunc_strncat_chk: - return optimizeStrNCatChk(CI, Builder); - case LibFunc_strlcpy_chk: - return optimizeStrLCpyChk(CI, Builder); - case LibFunc_vsnprintf_chk: - return optimizeVSNPrintfChk(CI, Builder); - case LibFunc_vsprintf_chk: - return optimizeVSPrintfChk(CI, Builder); - default: - break; - } - return nullptr; -} - -FortifiedLibCallSimplifier::FortifiedLibCallSimplifier( - const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize) - : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {} diff --git a/contrib/llvm/lib/Transforms/Utils/SizeOpts.cpp b/contrib/llvm/lib/Transforms/Utils/SizeOpts.cpp deleted file mode 100644 index 1519751197d2..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/SizeOpts.cpp +++ /dev/null @@ -1,37 +0,0 @@ -//===-- SizeOpts.cpp - code size optimization related code ----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains some shared code size optimization related code. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Transforms/Utils/SizeOpts.h" -using namespace llvm; - -static cl::opt<bool> ProfileGuidedSizeOpt( - "pgso", cl::Hidden, cl::init(true), - cl::desc("Enable the profile guided size optimization. ")); - -bool llvm::shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI) { - assert(F); - if (!PSI || !BFI || !PSI->hasProfileSummary()) - return false; - return ProfileGuidedSizeOpt && PSI->isFunctionColdInCallGraph(F, *BFI); -} - -bool llvm::shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI) { - assert(BB); - if (!PSI || !BFI || !PSI->hasProfileSummary()) - return false; - return ProfileGuidedSizeOpt && PSI->isColdBlock(BB, BFI); -} diff --git a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp deleted file mode 100644 index e2c387cb8983..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp +++ /dev/null @@ -1,284 +0,0 @@ -//===- SplitModule.cpp - Split a module into partitions -------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the function llvm::SplitModule, which splits a module -// into multiple linkable partitions. It can be used to implement parallel code -// generation for link-time optimization. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/SplitModule.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/EquivalenceClasses.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/IR/Comdat.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalAlias.h" -#include "llvm/IR/GlobalObject.h" -#include "llvm/IR/GlobalIndirectSymbol.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/User.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MD5.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -#include <algorithm> -#include <cassert> -#include <iterator> -#include <memory> -#include <queue> -#include <utility> -#include <vector> - -using namespace llvm; - -#define DEBUG_TYPE "split-module" - -namespace { - -using ClusterMapType = EquivalenceClasses<const GlobalValue *>; -using ComdatMembersType = DenseMap<const Comdat *, const GlobalValue *>; -using ClusterIDMapType = DenseMap<const GlobalValue *, unsigned>; - -} // end anonymous namespace - -static void addNonConstUser(ClusterMapType &GVtoClusterMap, - const GlobalValue *GV, const User *U) { - assert((!isa<Constant>(U) || isa<GlobalValue>(U)) && "Bad user"); - - if (const Instruction *I = dyn_cast<Instruction>(U)) { - const GlobalValue *F = I->getParent()->getParent(); - GVtoClusterMap.unionSets(GV, F); - } else if (isa<GlobalIndirectSymbol>(U) || isa<Function>(U) || - isa<GlobalVariable>(U)) { - GVtoClusterMap.unionSets(GV, cast<GlobalValue>(U)); - } else { - llvm_unreachable("Underimplemented use case"); - } -} - -// Adds all GlobalValue users of V to the same cluster as GV. -static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap, - const GlobalValue *GV, const Value *V) { - for (auto *U : V->users()) { - SmallVector<const User *, 4> Worklist; - Worklist.push_back(U); - while (!Worklist.empty()) { - const User *UU = Worklist.pop_back_val(); - // For each constant that is not a GV (a pure const) recurse. - if (isa<Constant>(UU) && !isa<GlobalValue>(UU)) { - Worklist.append(UU->user_begin(), UU->user_end()); - continue; - } - addNonConstUser(GVtoClusterMap, GV, UU); - } - } -} - -// Find partitions for module in the way that no locals need to be -// globalized. -// Try to balance pack those partitions into N files since this roughly equals -// thread balancing for the backend codegen step. -static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap, - unsigned N) { - // At this point module should have the proper mix of globals and locals. - // As we attempt to partition this module, we must not change any - // locals to globals. - LLVM_DEBUG(dbgs() << "Partition module with (" << M->size() - << ")functions\n"); - ClusterMapType GVtoClusterMap; - ComdatMembersType ComdatMembers; - - auto recordGVSet = [&GVtoClusterMap, &ComdatMembers](GlobalValue &GV) { - if (GV.isDeclaration()) - return; - - if (!GV.hasName()) - GV.setName("__llvmsplit_unnamed"); - - // Comdat groups must not be partitioned. For comdat groups that contain - // locals, record all their members here so we can keep them together. - // Comdat groups that only contain external globals are already handled by - // the MD5-based partitioning. - if (const Comdat *C = GV.getComdat()) { - auto &Member = ComdatMembers[C]; - if (Member) - GVtoClusterMap.unionSets(Member, &GV); - else - Member = &GV; - } - - // For aliases we should not separate them from their aliasees regardless - // of linkage. - if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(&GV)) { - if (const GlobalObject *Base = GIS->getBaseObject()) - GVtoClusterMap.unionSets(&GV, Base); - } - - if (const Function *F = dyn_cast<Function>(&GV)) { - for (const BasicBlock &BB : *F) { - BlockAddress *BA = BlockAddress::lookup(&BB); - if (!BA || !BA->isConstantUsed()) - continue; - addAllGlobalValueUsers(GVtoClusterMap, F, BA); - } - } - - if (GV.hasLocalLinkage()) - addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV); - }; - - llvm::for_each(M->functions(), recordGVSet); - llvm::for_each(M->globals(), recordGVSet); - llvm::for_each(M->aliases(), recordGVSet); - - // Assigned all GVs to merged clusters while balancing number of objects in - // each. - auto CompareClusters = [](const std::pair<unsigned, unsigned> &a, - const std::pair<unsigned, unsigned> &b) { - if (a.second || b.second) - return a.second > b.second; - else - return a.first > b.first; - }; - - std::priority_queue<std::pair<unsigned, unsigned>, - std::vector<std::pair<unsigned, unsigned>>, - decltype(CompareClusters)> - BalancinQueue(CompareClusters); - // Pre-populate priority queue with N slot blanks. - for (unsigned i = 0; i < N; ++i) - BalancinQueue.push(std::make_pair(i, 0)); - - using SortType = std::pair<unsigned, ClusterMapType::iterator>; - - SmallVector<SortType, 64> Sets; - SmallPtrSet<const GlobalValue *, 32> Visited; - - // To guarantee determinism, we have to sort SCC according to size. - // When size is the same, use leader's name. - for (ClusterMapType::iterator I = GVtoClusterMap.begin(), - E = GVtoClusterMap.end(); I != E; ++I) - if (I->isLeader()) - Sets.push_back( - std::make_pair(std::distance(GVtoClusterMap.member_begin(I), - GVtoClusterMap.member_end()), I)); - - llvm::sort(Sets, [](const SortType &a, const SortType &b) { - if (a.first == b.first) - return a.second->getData()->getName() > b.second->getData()->getName(); - else - return a.first > b.first; - }); - - for (auto &I : Sets) { - unsigned CurrentClusterID = BalancinQueue.top().first; - unsigned CurrentClusterSize = BalancinQueue.top().second; - BalancinQueue.pop(); - - LLVM_DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size(" - << I.first << ") ----> " << I.second->getData()->getName() - << "\n"); - - for (ClusterMapType::member_iterator MI = - GVtoClusterMap.findLeader(I.second); - MI != GVtoClusterMap.member_end(); ++MI) { - if (!Visited.insert(*MI).second) - continue; - LLVM_DEBUG(dbgs() << "----> " << (*MI)->getName() - << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n"); - Visited.insert(*MI); - ClusterIDMap[*MI] = CurrentClusterID; - CurrentClusterSize++; - } - // Add this set size to the number of entries in this cluster. - BalancinQueue.push(std::make_pair(CurrentClusterID, CurrentClusterSize)); - } -} - -static void externalize(GlobalValue *GV) { - if (GV->hasLocalLinkage()) { - GV->setLinkage(GlobalValue::ExternalLinkage); - GV->setVisibility(GlobalValue::HiddenVisibility); - } - - // Unnamed entities must be named consistently between modules. setName will - // give a distinct name to each such entity. - if (!GV->hasName()) - GV->setName("__llvmsplit_unnamed"); -} - -// Returns whether GV should be in partition (0-based) I of N. -static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) { - if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(GV)) - if (const GlobalObject *Base = GIS->getBaseObject()) - GV = Base; - - StringRef Name; - if (const Comdat *C = GV->getComdat()) - Name = C->getName(); - else - Name = GV->getName(); - - // Partition by MD5 hash. We only need a few bits for evenness as the number - // of partitions will generally be in the 1-2 figure range; the low 16 bits - // are enough. - MD5 H; - MD5::MD5Result R; - H.update(Name); - H.final(R); - return (R[0] | (R[1] << 8)) % N == I; -} - -void llvm::SplitModule( - std::unique_ptr<Module> M, unsigned N, - function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback, - bool PreserveLocals) { - if (!PreserveLocals) { - for (Function &F : *M) - externalize(&F); - for (GlobalVariable &GV : M->globals()) - externalize(&GV); - for (GlobalAlias &GA : M->aliases()) - externalize(&GA); - for (GlobalIFunc &GIF : M->ifuncs()) - externalize(&GIF); - } - - // This performs splitting without a need for externalization, which might not - // always be possible. - ClusterIDMapType ClusterIDMap; - findPartitions(M.get(), ClusterIDMap, N); - - // FIXME: We should be able to reuse M as the last partition instead of - // cloning it. - for (unsigned I = 0; I < N; ++I) { - ValueToValueMapTy VMap; - std::unique_ptr<Module> MPart( - CloneModule(*M, VMap, [&](const GlobalValue *GV) { - if (ClusterIDMap.count(GV)) - return (ClusterIDMap[GV] == I); - else - return isInPartition(GV, I, N); - })); - if (I != 0) - MPart->setModuleInlineAsm(""); - ModuleCallback(std::move(MPart)); - } -} diff --git a/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp deleted file mode 100644 index 50844cf9d1c5..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp +++ /dev/null @@ -1,75 +0,0 @@ -//===- StripGCRelocates.cpp - Remove gc.relocates inserted by RewriteStatePoints===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This is a little utility pass that removes the gc.relocates inserted by -// RewriteStatepointsForGC. Note that the generated IR is incorrect, -// but this is useful as a single pass in itself, for analysis of IR, without -// the GC.relocates. The statepoint and gc.result instrinsics would still be -// present. -//===----------------------------------------------------------------------===// - -#include "llvm/IR/Function.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Statepoint.h" -#include "llvm/IR/Type.h" -#include "llvm/Pass.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { -struct StripGCRelocates : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - StripGCRelocates() : FunctionPass(ID) { - initializeStripGCRelocatesPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &Info) const override {} - - bool runOnFunction(Function &F) override; - -}; -char StripGCRelocates::ID = 0; -} - -bool StripGCRelocates::runOnFunction(Function &F) { - // Nothing to do for declarations. - if (F.isDeclaration()) - return false; - SmallVector<GCRelocateInst *, 20> GCRelocates; - // TODO: We currently do not handle gc.relocates that are in landing pads, - // i.e. not bound to a single statepoint token. - for (Instruction &I : instructions(F)) { - if (auto *GCR = dyn_cast<GCRelocateInst>(&I)) - if (isStatepoint(GCR->getOperand(0))) - GCRelocates.push_back(GCR); - } - // All gc.relocates are bound to a single statepoint token. The order of - // visiting gc.relocates for deletion does not matter. - for (GCRelocateInst *GCRel : GCRelocates) { - Value *OrigPtr = GCRel->getDerivedPtr(); - Value *ReplaceGCRel = OrigPtr; - - // All gc_relocates are i8 addrspace(1)* typed, we need a bitcast from i8 - // addrspace(1)* to the type of the OrigPtr, if the are not the same. - if (GCRel->getType() != OrigPtr->getType()) - ReplaceGCRel = new BitCastInst(OrigPtr, GCRel->getType(), "cast", GCRel); - - // Replace all uses of gc.relocate and delete the gc.relocate - // There maybe unncessary bitcasts back to the OrigPtr type, an instcombine - // pass would clear this up. - GCRel->replaceAllUsesWith(ReplaceGCRel); - GCRel->eraseFromParent(); - } - return !GCRelocates.empty(); -} - -INITIALIZE_PASS(StripGCRelocates, "strip-gc-relocates", - "Strip gc.relocates inserted through RewriteStatepointsForGC", - true, false) diff --git a/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp deleted file mode 100644 index 97a4533fabe5..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp +++ /dev/null @@ -1,41 +0,0 @@ -//===- StripNonLineTableDebugInfo.cpp -- Strip parts of Debug Info --------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/IR/DebugInfo.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Utils.h" -using namespace llvm; - -namespace { - -/// This pass strips all debug info that is not related line tables. -/// The result will be the same as if the program where compiled with -/// -gline-tables-only. -struct StripNonLineTableDebugInfo : public ModulePass { - static char ID; // Pass identification, replacement for typeid - StripNonLineTableDebugInfo() : ModulePass(ID) { - initializeStripNonLineTableDebugInfoPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - } - - bool runOnModule(Module &M) override { - return llvm::stripNonLineTableDebugInfo(M); - } -}; -} - -char StripNonLineTableDebugInfo::ID = 0; -INITIALIZE_PASS(StripNonLineTableDebugInfo, "strip-nonlinetable-debuginfo", - "Strip all debug info except linetables", false, false) - -ModulePass *llvm::createStripNonLineTableDebugInfoPass() { - return new StripNonLineTableDebugInfo(); -} diff --git a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp deleted file mode 100644 index 456724779b43..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp +++ /dev/null @@ -1,584 +0,0 @@ -//===- SymbolRewriter.cpp - Symbol Rewriter -------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// SymbolRewriter is a LLVM pass which can rewrite symbols transparently within -// existing code. It is implemented as a compiler pass and is configured via a -// YAML configuration file. -// -// The YAML configuration file format is as follows: -// -// RewriteMapFile := RewriteDescriptors -// RewriteDescriptors := RewriteDescriptor | RewriteDescriptors -// RewriteDescriptor := RewriteDescriptorType ':' '{' RewriteDescriptorFields '}' -// RewriteDescriptorFields := RewriteDescriptorField | RewriteDescriptorFields -// RewriteDescriptorField := FieldIdentifier ':' FieldValue ',' -// RewriteDescriptorType := Identifier -// FieldIdentifier := Identifier -// FieldValue := Identifier -// Identifier := [0-9a-zA-Z]+ -// -// Currently, the following descriptor types are supported: -// -// - function: (function rewriting) -// + Source (original name of the function) -// + Target (explicit transformation) -// + Transform (pattern transformation) -// + Naked (boolean, whether the function is undecorated) -// - global variable: (external linkage global variable rewriting) -// + Source (original name of externally visible variable) -// + Target (explicit transformation) -// + Transform (pattern transformation) -// - global alias: (global alias rewriting) -// + Source (original name of the aliased name) -// + Target (explicit transformation) -// + Transform (pattern transformation) -// -// Note that source and exactly one of [Target, Transform] must be provided -// -// New rewrite descriptors can be created. Addding a new rewrite descriptor -// involves: -// -// a) extended the rewrite descriptor kind enumeration -// (<anonymous>::RewriteDescriptor::RewriteDescriptorType) -// b) implementing the new descriptor -// (c.f. <anonymous>::ExplicitRewriteFunctionDescriptor) -// c) extending the rewrite map parser -// (<anonymous>::RewriteMapParser::parseEntry) -// -// Specify to rewrite the symbols using the `-rewrite-symbols` option, and -// specify the map file to use for the rewriting via the `-rewrite-map-file` -// option. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/SymbolRewriter.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/ilist.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/IR/Comdat.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalAlias.h" -#include "llvm/IR/GlobalObject.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Value.h" -#include "llvm/Pass.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ErrorOr.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Regex.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/YAMLParser.h" -#include <memory> -#include <string> -#include <vector> - -using namespace llvm; -using namespace SymbolRewriter; - -#define DEBUG_TYPE "symbol-rewriter" - -static cl::list<std::string> RewriteMapFiles("rewrite-map-file", - cl::desc("Symbol Rewrite Map"), - cl::value_desc("filename"), - cl::Hidden); - -static void rewriteComdat(Module &M, GlobalObject *GO, - const std::string &Source, - const std::string &Target) { - if (Comdat *CD = GO->getComdat()) { - auto &Comdats = M.getComdatSymbolTable(); - - Comdat *C = M.getOrInsertComdat(Target); - C->setSelectionKind(CD->getSelectionKind()); - GO->setComdat(C); - - Comdats.erase(Comdats.find(Source)); - } -} - -namespace { - -template <RewriteDescriptor::Type DT, typename ValueType, - ValueType *(Module::*Get)(StringRef) const> -class ExplicitRewriteDescriptor : public RewriteDescriptor { -public: - const std::string Source; - const std::string Target; - - ExplicitRewriteDescriptor(StringRef S, StringRef T, const bool Naked) - : RewriteDescriptor(DT), Source(Naked ? StringRef("\01" + S.str()) : S), - Target(T) {} - - bool performOnModule(Module &M) override; - - static bool classof(const RewriteDescriptor *RD) { - return RD->getType() == DT; - } -}; - -} // end anonymous namespace - -template <RewriteDescriptor::Type DT, typename ValueType, - ValueType *(Module::*Get)(StringRef) const> -bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) { - bool Changed = false; - if (ValueType *S = (M.*Get)(Source)) { - if (GlobalObject *GO = dyn_cast<GlobalObject>(S)) - rewriteComdat(M, GO, Source, Target); - - if (Value *T = (M.*Get)(Target)) - S->setValueName(T->getValueName()); - else - S->setName(Target); - - Changed = true; - } - return Changed; -} - -namespace { - -template <RewriteDescriptor::Type DT, typename ValueType, - ValueType *(Module::*Get)(StringRef) const, - iterator_range<typename iplist<ValueType>::iterator> - (Module::*Iterator)()> -class PatternRewriteDescriptor : public RewriteDescriptor { -public: - const std::string Pattern; - const std::string Transform; - - PatternRewriteDescriptor(StringRef P, StringRef T) - : RewriteDescriptor(DT), Pattern(P), Transform(T) { } - - bool performOnModule(Module &M) override; - - static bool classof(const RewriteDescriptor *RD) { - return RD->getType() == DT; - } -}; - -} // end anonymous namespace - -template <RewriteDescriptor::Type DT, typename ValueType, - ValueType *(Module::*Get)(StringRef) const, - iterator_range<typename iplist<ValueType>::iterator> - (Module::*Iterator)()> -bool PatternRewriteDescriptor<DT, ValueType, Get, Iterator>:: -performOnModule(Module &M) { - bool Changed = false; - for (auto &C : (M.*Iterator)()) { - std::string Error; - - std::string Name = Regex(Pattern).sub(Transform, C.getName(), &Error); - if (!Error.empty()) - report_fatal_error("unable to transforn " + C.getName() + " in " + - M.getModuleIdentifier() + ": " + Error); - - if (C.getName() == Name) - continue; - - if (GlobalObject *GO = dyn_cast<GlobalObject>(&C)) - rewriteComdat(M, GO, C.getName(), Name); - - if (Value *V = (M.*Get)(Name)) - C.setValueName(V->getValueName()); - else - C.setName(Name); - - Changed = true; - } - return Changed; -} - -namespace { - -/// Represents a rewrite for an explicitly named (function) symbol. Both the -/// source function name and target function name of the transformation are -/// explicitly spelt out. -using ExplicitRewriteFunctionDescriptor = - ExplicitRewriteDescriptor<RewriteDescriptor::Type::Function, Function, - &Module::getFunction>; - -/// Represents a rewrite for an explicitly named (global variable) symbol. Both -/// the source variable name and target variable name are spelt out. This -/// applies only to module level variables. -using ExplicitRewriteGlobalVariableDescriptor = - ExplicitRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable, - GlobalVariable, &Module::getGlobalVariable>; - -/// Represents a rewrite for an explicitly named global alias. Both the source -/// and target name are explicitly spelt out. -using ExplicitRewriteNamedAliasDescriptor = - ExplicitRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias, - &Module::getNamedAlias>; - -/// Represents a rewrite for a regular expression based pattern for functions. -/// A pattern for the function name is provided and a transformation for that -/// pattern to determine the target function name create the rewrite rule. -using PatternRewriteFunctionDescriptor = - PatternRewriteDescriptor<RewriteDescriptor::Type::Function, Function, - &Module::getFunction, &Module::functions>; - -/// Represents a rewrite for a global variable based upon a matching pattern. -/// Each global variable matching the provided pattern will be transformed as -/// described in the transformation pattern for the target. Applies only to -/// module level variables. -using PatternRewriteGlobalVariableDescriptor = - PatternRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable, - GlobalVariable, &Module::getGlobalVariable, - &Module::globals>; - -/// PatternRewriteNamedAliasDescriptor - represents a rewrite for global -/// aliases which match a given pattern. The provided transformation will be -/// applied to each of the matching names. -using PatternRewriteNamedAliasDescriptor = - PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias, - &Module::getNamedAlias, &Module::aliases>; - -} // end anonymous namespace - -bool RewriteMapParser::parse(const std::string &MapFile, - RewriteDescriptorList *DL) { - ErrorOr<std::unique_ptr<MemoryBuffer>> Mapping = - MemoryBuffer::getFile(MapFile); - - if (!Mapping) - report_fatal_error("unable to read rewrite map '" + MapFile + "': " + - Mapping.getError().message()); - - if (!parse(*Mapping, DL)) - report_fatal_error("unable to parse rewrite map '" + MapFile + "'"); - - return true; -} - -bool RewriteMapParser::parse(std::unique_ptr<MemoryBuffer> &MapFile, - RewriteDescriptorList *DL) { - SourceMgr SM; - yaml::Stream YS(MapFile->getBuffer(), SM); - - for (auto &Document : YS) { - yaml::MappingNode *DescriptorList; - - // ignore empty documents - if (isa<yaml::NullNode>(Document.getRoot())) - continue; - - DescriptorList = dyn_cast<yaml::MappingNode>(Document.getRoot()); - if (!DescriptorList) { - YS.printError(Document.getRoot(), "DescriptorList node must be a map"); - return false; - } - - for (auto &Descriptor : *DescriptorList) - if (!parseEntry(YS, Descriptor, DL)) - return false; - } - - return true; -} - -bool RewriteMapParser::parseEntry(yaml::Stream &YS, yaml::KeyValueNode &Entry, - RewriteDescriptorList *DL) { - yaml::ScalarNode *Key; - yaml::MappingNode *Value; - SmallString<32> KeyStorage; - StringRef RewriteType; - - Key = dyn_cast<yaml::ScalarNode>(Entry.getKey()); - if (!Key) { - YS.printError(Entry.getKey(), "rewrite type must be a scalar"); - return false; - } - - Value = dyn_cast<yaml::MappingNode>(Entry.getValue()); - if (!Value) { - YS.printError(Entry.getValue(), "rewrite descriptor must be a map"); - return false; - } - - RewriteType = Key->getValue(KeyStorage); - if (RewriteType.equals("function")) - return parseRewriteFunctionDescriptor(YS, Key, Value, DL); - else if (RewriteType.equals("global variable")) - return parseRewriteGlobalVariableDescriptor(YS, Key, Value, DL); - else if (RewriteType.equals("global alias")) - return parseRewriteGlobalAliasDescriptor(YS, Key, Value, DL); - - YS.printError(Entry.getKey(), "unknown rewrite type"); - return false; -} - -bool RewriteMapParser:: -parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, - yaml::MappingNode *Descriptor, - RewriteDescriptorList *DL) { - bool Naked = false; - std::string Source; - std::string Target; - std::string Transform; - - for (auto &Field : *Descriptor) { - yaml::ScalarNode *Key; - yaml::ScalarNode *Value; - SmallString<32> KeyStorage; - SmallString<32> ValueStorage; - StringRef KeyValue; - - Key = dyn_cast<yaml::ScalarNode>(Field.getKey()); - if (!Key) { - YS.printError(Field.getKey(), "descriptor key must be a scalar"); - return false; - } - - Value = dyn_cast<yaml::ScalarNode>(Field.getValue()); - if (!Value) { - YS.printError(Field.getValue(), "descriptor value must be a scalar"); - return false; - } - - KeyValue = Key->getValue(KeyStorage); - if (KeyValue.equals("source")) { - std::string Error; - - Source = Value->getValue(ValueStorage); - if (!Regex(Source).isValid(Error)) { - YS.printError(Field.getKey(), "invalid regex: " + Error); - return false; - } - } else if (KeyValue.equals("target")) { - Target = Value->getValue(ValueStorage); - } else if (KeyValue.equals("transform")) { - Transform = Value->getValue(ValueStorage); - } else if (KeyValue.equals("naked")) { - std::string Undecorated; - - Undecorated = Value->getValue(ValueStorage); - Naked = StringRef(Undecorated).lower() == "true" || Undecorated == "1"; - } else { - YS.printError(Field.getKey(), "unknown key for function"); - return false; - } - } - - if (Transform.empty() == Target.empty()) { - YS.printError(Descriptor, - "exactly one of transform or target must be specified"); - return false; - } - - // TODO see if there is a more elegant solution to selecting the rewrite - // descriptor type - if (!Target.empty()) - DL->push_back(llvm::make_unique<ExplicitRewriteFunctionDescriptor>( - Source, Target, Naked)); - else - DL->push_back( - llvm::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform)); - - return true; -} - -bool RewriteMapParser:: -parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, - yaml::MappingNode *Descriptor, - RewriteDescriptorList *DL) { - std::string Source; - std::string Target; - std::string Transform; - - for (auto &Field : *Descriptor) { - yaml::ScalarNode *Key; - yaml::ScalarNode *Value; - SmallString<32> KeyStorage; - SmallString<32> ValueStorage; - StringRef KeyValue; - - Key = dyn_cast<yaml::ScalarNode>(Field.getKey()); - if (!Key) { - YS.printError(Field.getKey(), "descriptor Key must be a scalar"); - return false; - } - - Value = dyn_cast<yaml::ScalarNode>(Field.getValue()); - if (!Value) { - YS.printError(Field.getValue(), "descriptor value must be a scalar"); - return false; - } - - KeyValue = Key->getValue(KeyStorage); - if (KeyValue.equals("source")) { - std::string Error; - - Source = Value->getValue(ValueStorage); - if (!Regex(Source).isValid(Error)) { - YS.printError(Field.getKey(), "invalid regex: " + Error); - return false; - } - } else if (KeyValue.equals("target")) { - Target = Value->getValue(ValueStorage); - } else if (KeyValue.equals("transform")) { - Transform = Value->getValue(ValueStorage); - } else { - YS.printError(Field.getKey(), "unknown Key for Global Variable"); - return false; - } - } - - if (Transform.empty() == Target.empty()) { - YS.printError(Descriptor, - "exactly one of transform or target must be specified"); - return false; - } - - if (!Target.empty()) - DL->push_back(llvm::make_unique<ExplicitRewriteGlobalVariableDescriptor>( - Source, Target, - /*Naked*/ false)); - else - DL->push_back(llvm::make_unique<PatternRewriteGlobalVariableDescriptor>( - Source, Transform)); - - return true; -} - -bool RewriteMapParser:: -parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, - yaml::MappingNode *Descriptor, - RewriteDescriptorList *DL) { - std::string Source; - std::string Target; - std::string Transform; - - for (auto &Field : *Descriptor) { - yaml::ScalarNode *Key; - yaml::ScalarNode *Value; - SmallString<32> KeyStorage; - SmallString<32> ValueStorage; - StringRef KeyValue; - - Key = dyn_cast<yaml::ScalarNode>(Field.getKey()); - if (!Key) { - YS.printError(Field.getKey(), "descriptor key must be a scalar"); - return false; - } - - Value = dyn_cast<yaml::ScalarNode>(Field.getValue()); - if (!Value) { - YS.printError(Field.getValue(), "descriptor value must be a scalar"); - return false; - } - - KeyValue = Key->getValue(KeyStorage); - if (KeyValue.equals("source")) { - std::string Error; - - Source = Value->getValue(ValueStorage); - if (!Regex(Source).isValid(Error)) { - YS.printError(Field.getKey(), "invalid regex: " + Error); - return false; - } - } else if (KeyValue.equals("target")) { - Target = Value->getValue(ValueStorage); - } else if (KeyValue.equals("transform")) { - Transform = Value->getValue(ValueStorage); - } else { - YS.printError(Field.getKey(), "unknown key for Global Alias"); - return false; - } - } - - if (Transform.empty() == Target.empty()) { - YS.printError(Descriptor, - "exactly one of transform or target must be specified"); - return false; - } - - if (!Target.empty()) - DL->push_back(llvm::make_unique<ExplicitRewriteNamedAliasDescriptor>( - Source, Target, - /*Naked*/ false)); - else - DL->push_back(llvm::make_unique<PatternRewriteNamedAliasDescriptor>( - Source, Transform)); - - return true; -} - -namespace { - -class RewriteSymbolsLegacyPass : public ModulePass { -public: - static char ID; // Pass identification, replacement for typeid - - RewriteSymbolsLegacyPass(); - RewriteSymbolsLegacyPass(SymbolRewriter::RewriteDescriptorList &DL); - - bool runOnModule(Module &M) override; - -private: - RewriteSymbolPass Impl; -}; - -} // end anonymous namespace - -char RewriteSymbolsLegacyPass::ID = 0; - -RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID) { - initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry()); -} - -RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass( - SymbolRewriter::RewriteDescriptorList &DL) - : ModulePass(ID), Impl(DL) {} - -bool RewriteSymbolsLegacyPass::runOnModule(Module &M) { - return Impl.runImpl(M); -} - -PreservedAnalyses RewriteSymbolPass::run(Module &M, ModuleAnalysisManager &AM) { - if (!runImpl(M)) - return PreservedAnalyses::all(); - - return PreservedAnalyses::none(); -} - -bool RewriteSymbolPass::runImpl(Module &M) { - bool Changed; - - Changed = false; - for (auto &Descriptor : Descriptors) - Changed |= Descriptor->performOnModule(M); - - return Changed; -} - -void RewriteSymbolPass::loadAndParseMapFiles() { - const std::vector<std::string> MapFiles(RewriteMapFiles); - SymbolRewriter::RewriteMapParser Parser; - - for (const auto &MapFile : MapFiles) - Parser.parse(MapFile, &Descriptors); -} - -INITIALIZE_PASS(RewriteSymbolsLegacyPass, "rewrite-symbols", "Rewrite Symbols", - false, false) - -ModulePass *llvm::createRewriteSymbolsPass() { - return new RewriteSymbolsLegacyPass(); -} - -ModulePass * -llvm::createRewriteSymbolsPass(SymbolRewriter::RewriteDescriptorList &DL) { - return new RewriteSymbolsLegacyPass(DL); -} diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp deleted file mode 100644 index 7f7bdf8a3d6d..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ /dev/null @@ -1,114 +0,0 @@ -//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass is used to ensure that functions have at most one return -// instruction in them. Additionally, it keeps track of which node is the new -// exit node of the CFG. If there are no exit nodes in the CFG, the getExitNode -// method will return a null pointer. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Type.h" -#include "llvm/Transforms/Utils.h" -using namespace llvm; - -char UnifyFunctionExitNodes::ID = 0; -INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn", - "Unify function exit nodes", false, false) - -Pass *llvm::createUnifyFunctionExitNodesPass() { - return new UnifyFunctionExitNodes(); -} - -void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{ - // We preserve the non-critical-edgeness property - AU.addPreservedID(BreakCriticalEdgesID); - // This is a cluster of orthogonal Transforms - AU.addPreservedID(LowerSwitchID); -} - -// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new -// BasicBlock, and converting all returns to unconditional branches to this -// new basic block. The singular exit node is returned. -// -// If there are no return stmts in the Function, a null pointer is returned. -// -bool UnifyFunctionExitNodes::runOnFunction(Function &F) { - // Loop over all of the blocks in a function, tracking all of the blocks that - // return. - // - std::vector<BasicBlock*> ReturningBlocks; - std::vector<BasicBlock*> UnreachableBlocks; - for (BasicBlock &I : F) - if (isa<ReturnInst>(I.getTerminator())) - ReturningBlocks.push_back(&I); - else if (isa<UnreachableInst>(I.getTerminator())) - UnreachableBlocks.push_back(&I); - - // Then unreachable blocks. - if (UnreachableBlocks.empty()) { - UnreachableBlock = nullptr; - } else if (UnreachableBlocks.size() == 1) { - UnreachableBlock = UnreachableBlocks.front(); - } else { - UnreachableBlock = BasicBlock::Create(F.getContext(), - "UnifiedUnreachableBlock", &F); - new UnreachableInst(F.getContext(), UnreachableBlock); - - for (BasicBlock *BB : UnreachableBlocks) { - BB->getInstList().pop_back(); // Remove the unreachable inst. - BranchInst::Create(UnreachableBlock, BB); - } - } - - // Now handle return blocks. - if (ReturningBlocks.empty()) { - ReturnBlock = nullptr; - return false; // No blocks return - } else if (ReturningBlocks.size() == 1) { - ReturnBlock = ReturningBlocks.front(); // Already has a single return block - return false; - } - - // Otherwise, we need to insert a new basic block into the function, add a PHI - // nodes (if the function returns values), and convert all of the return - // instructions into unconditional branches. - // - BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), - "UnifiedReturnBlock", &F); - - PHINode *PN = nullptr; - if (F.getReturnType()->isVoidTy()) { - ReturnInst::Create(F.getContext(), nullptr, NewRetBlock); - } else { - // If the function doesn't return void... add a PHI node to the block... - PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(), - "UnifiedRetVal"); - NewRetBlock->getInstList().push_back(PN); - ReturnInst::Create(F.getContext(), PN, NewRetBlock); - } - - // Loop over all of the blocks, replacing the return instruction with an - // unconditional branch. - // - for (BasicBlock *BB : ReturningBlocks) { - // Add an incoming element to the PHI node for every return instruction that - // is merging into this new block... - if (PN) - PN->addIncoming(BB->getTerminator()->getOperand(0), BB); - - BB->getInstList().pop_back(); // Remove the return insn - BranchInst::Create(NewRetBlock, BB); - } - ReturnBlock = NewRetBlock; - return true; -} diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp deleted file mode 100644 index 5272ab6e95d5..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/Utils.cpp +++ /dev/null @@ -1,59 +0,0 @@ -//===-- Utils.cpp - TransformUtils Infrastructure -------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the common initialization infrastructure for the -// TransformUtils library. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils.h" -#include "llvm-c/Initialization.h" -#include "llvm-c/Transforms/Utils.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/InitializePasses.h" -#include "llvm/PassRegistry.h" - -using namespace llvm; - -/// initializeTransformUtils - Initialize all passes in the TransformUtils -/// library. -void llvm::initializeTransformUtils(PassRegistry &Registry) { - initializeAddDiscriminatorsLegacyPassPass(Registry); - initializeBreakCriticalEdgesPass(Registry); - initializeCanonicalizeAliasesLegacyPassPass(Registry); - initializeInstNamerPass(Registry); - initializeLCSSAWrapperPassPass(Registry); - initializeLibCallsShrinkWrapLegacyPassPass(Registry); - initializeLoopSimplifyPass(Registry); - initializeLowerInvokeLegacyPassPass(Registry); - initializeLowerSwitchPass(Registry); - initializeNameAnonGlobalLegacyPassPass(Registry); - initializePromoteLegacyPassPass(Registry); - initializeStripNonLineTableDebugInfoPass(Registry); - initializeUnifyFunctionExitNodesPass(Registry); - initializeMetaRenamerPass(Registry); - initializeStripGCRelocatesPass(Registry); - initializePredicateInfoPrinterLegacyPassPass(Registry); -} - -/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses. -void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) { - initializeTransformUtils(*unwrap(R)); -} - -void LLVMAddLowerSwitchPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createLowerSwitchPass()); -} - -void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createPromoteMemoryToRegisterPass()); -} - -void LLVMAddAddDiscriminatorsPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createAddDiscriminatorsPass()); -} diff --git a/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp b/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp deleted file mode 100644 index a77bf50fe10b..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp +++ /dev/null @@ -1,539 +0,0 @@ -#include "llvm/Transforms/Utils/VNCoercion.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "vncoerce" -namespace llvm { -namespace VNCoercion { - -/// Return true if coerceAvailableValueToLoadType will succeed. -bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, - const DataLayout &DL) { - Type *StoredTy = StoredVal->getType(); - if (StoredTy == LoadTy) - return true; - - // If the loaded or stored value is an first class array or struct, don't try - // to transform them. We need to be able to bitcast to integer. - if (LoadTy->isStructTy() || LoadTy->isArrayTy() || StoredTy->isStructTy() || - StoredTy->isArrayTy()) - return false; - - uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy); - - // The store size must be byte-aligned to support future type casts. - if (llvm::alignTo(StoreSize, 8) != StoreSize) - return false; - - // The store has to be at least as big as the load. - if (StoreSize < DL.getTypeSizeInBits(LoadTy)) - return false; - - // Don't coerce non-integral pointers to integers or vice versa. - if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) != - DL.isNonIntegralPointerType(LoadTy->getScalarType())) { - // As a special case, allow coercion of memset used to initialize - // an array w/null. Despite non-integral pointers not generally having a - // specific bit pattern, we do assume null is zero. - if (auto *CI = dyn_cast<Constant>(StoredVal)) - return CI->isNullValue(); - return false; - } - - return true; -} - -template <class T, class HelperClass> -static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy, - HelperClass &Helper, - const DataLayout &DL) { - assert(canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) && - "precondition violation - materialization can't fail"); - if (auto *C = dyn_cast<Constant>(StoredVal)) - if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL)) - StoredVal = FoldedStoredVal; - - // If this is already the right type, just return it. - Type *StoredValTy = StoredVal->getType(); - - uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy); - uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy); - - // If the store and reload are the same size, we can always reuse it. - if (StoredValSize == LoadedValSize) { - // Pointer to Pointer -> use bitcast. - if (StoredValTy->isPtrOrPtrVectorTy() && LoadedTy->isPtrOrPtrVectorTy()) { - StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy); - } else { - // Convert source pointers to integers, which can be bitcast. - if (StoredValTy->isPtrOrPtrVectorTy()) { - StoredValTy = DL.getIntPtrType(StoredValTy); - StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy); - } - - Type *TypeToCastTo = LoadedTy; - if (TypeToCastTo->isPtrOrPtrVectorTy()) - TypeToCastTo = DL.getIntPtrType(TypeToCastTo); - - if (StoredValTy != TypeToCastTo) - StoredVal = Helper.CreateBitCast(StoredVal, TypeToCastTo); - - // Cast to pointer if the load needs a pointer type. - if (LoadedTy->isPtrOrPtrVectorTy()) - StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy); - } - - if (auto *C = dyn_cast<ConstantExpr>(StoredVal)) - if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL)) - StoredVal = FoldedStoredVal; - - return StoredVal; - } - // If the loaded value is smaller than the available value, then we can - // extract out a piece from it. If the available value is too small, then we - // can't do anything. - assert(StoredValSize >= LoadedValSize && - "canCoerceMustAliasedValueToLoad fail"); - - // Convert source pointers to integers, which can be manipulated. - if (StoredValTy->isPtrOrPtrVectorTy()) { - StoredValTy = DL.getIntPtrType(StoredValTy); - StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy); - } - - // Convert vectors and fp to integer, which can be manipulated. - if (!StoredValTy->isIntegerTy()) { - StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize); - StoredVal = Helper.CreateBitCast(StoredVal, StoredValTy); - } - - // If this is a big-endian system, we need to shift the value down to the low - // bits so that a truncate will work. - if (DL.isBigEndian()) { - uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy) - - DL.getTypeStoreSizeInBits(LoadedTy); - StoredVal = Helper.CreateLShr( - StoredVal, ConstantInt::get(StoredVal->getType(), ShiftAmt)); - } - - // Truncate the integer to the right size now. - Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize); - StoredVal = Helper.CreateTruncOrBitCast(StoredVal, NewIntTy); - - if (LoadedTy != NewIntTy) { - // If the result is a pointer, inttoptr. - if (LoadedTy->isPtrOrPtrVectorTy()) - StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy); - else - // Otherwise, bitcast. - StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy); - } - - if (auto *C = dyn_cast<Constant>(StoredVal)) - if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL)) - StoredVal = FoldedStoredVal; - - return StoredVal; -} - -/// If we saw a store of a value to memory, and -/// then a load from a must-aliased pointer of a different type, try to coerce -/// the stored value. LoadedTy is the type of the load we want to replace. -/// IRB is IRBuilder used to insert new instructions. -/// -/// If we can't do it, return null. -Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy, - IRBuilder<> &IRB, const DataLayout &DL) { - return coerceAvailableValueToLoadTypeHelper(StoredVal, LoadedTy, IRB, DL); -} - -/// This function is called when we have a memdep query of a load that ends up -/// being a clobbering memory write (store, memset, memcpy, memmove). This -/// means that the write *may* provide bits used by the load but we can't be -/// sure because the pointers don't must-alias. -/// -/// Check this case to see if there is anything more we can do before we give -/// up. This returns -1 if we have to give up, or a byte number in the stored -/// value of the piece that feeds the load. -static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, - Value *WritePtr, - uint64_t WriteSizeInBits, - const DataLayout &DL) { - // If the loaded or stored value is a first class array or struct, don't try - // to transform them. We need to be able to bitcast to integer. - if (LoadTy->isStructTy() || LoadTy->isArrayTy()) - return -1; - - int64_t StoreOffset = 0, LoadOffset = 0; - Value *StoreBase = - GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL); - Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL); - if (StoreBase != LoadBase) - return -1; - - // If the load and store are to the exact same address, they should have been - // a must alias. AA must have gotten confused. - // FIXME: Study to see if/when this happens. One case is forwarding a memset - // to a load from the base of the memset. - - // If the load and store don't overlap at all, the store doesn't provide - // anything to the load. In this case, they really don't alias at all, AA - // must have gotten confused. - uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy); - - if ((WriteSizeInBits & 7) | (LoadSize & 7)) - return -1; - uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes. - LoadSize /= 8; - - bool isAAFailure = false; - if (StoreOffset < LoadOffset) - isAAFailure = StoreOffset + int64_t(StoreSize) <= LoadOffset; - else - isAAFailure = LoadOffset + int64_t(LoadSize) <= StoreOffset; - - if (isAAFailure) - return -1; - - // If the Load isn't completely contained within the stored bits, we don't - // have all the bits to feed it. We could do something crazy in the future - // (issue a smaller load then merge the bits in) but this seems unlikely to be - // valuable. - if (StoreOffset > LoadOffset || - StoreOffset + StoreSize < LoadOffset + LoadSize) - return -1; - - // Okay, we can do this transformation. Return the number of bytes into the - // store that the load is. - return LoadOffset - StoreOffset; -} - -/// This function is called when we have a -/// memdep query of a load that ends up being a clobbering store. -int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, - StoreInst *DepSI, const DataLayout &DL) { - auto *StoredVal = DepSI->getValueOperand(); - - // Cannot handle reading from store of first-class aggregate yet. - if (StoredVal->getType()->isStructTy() || - StoredVal->getType()->isArrayTy()) - return -1; - - // Don't coerce non-integral pointers to integers or vice versa. - if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) != - DL.isNonIntegralPointerType(LoadTy->getScalarType())) { - // Allow casts of zero values to null as a special case - auto *CI = dyn_cast<Constant>(StoredVal); - if (!CI || !CI->isNullValue()) - return -1; - } - - Value *StorePtr = DepSI->getPointerOperand(); - uint64_t StoreSize = - DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()); - return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize, - DL); -} - -/// This function is called when we have a -/// memdep query of a load that ends up being clobbered by another load. See if -/// the other load can feed into the second load. -int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, - const DataLayout &DL) { - // Cannot handle reading from store of first-class aggregate yet. - if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) - return -1; - - // Don't coerce non-integral pointers to integers or vice versa. - if (DL.isNonIntegralPointerType(DepLI->getType()->getScalarType()) != - DL.isNonIntegralPointerType(LoadTy->getScalarType())) - return -1; - - Value *DepPtr = DepLI->getPointerOperand(); - uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()); - int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL); - if (R != -1) - return R; - - // If we have a load/load clobber an DepLI can be widened to cover this load, - // then we should widen it! - int64_t LoadOffs = 0; - const Value *LoadBase = - GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL); - unsigned LoadSize = DL.getTypeStoreSize(LoadTy); - - unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize( - LoadBase, LoadOffs, LoadSize, DepLI); - if (Size == 0) - return -1; - - // Check non-obvious conditions enforced by MDA which we rely on for being - // able to materialize this potentially available value - assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!"); - assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load"); - - return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size * 8, DL); -} - -int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, - MemIntrinsic *MI, const DataLayout &DL) { - // If the mem operation is a non-constant size, we can't handle it. - ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); - if (!SizeCst) - return -1; - uint64_t MemSizeInBits = SizeCst->getZExtValue() * 8; - - // If this is memset, we just need to see if the offset is valid in the size - // of the memset.. - if (MI->getIntrinsicID() == Intrinsic::memset) { - if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) { - auto *CI = dyn_cast<ConstantInt>(cast<MemSetInst>(MI)->getValue()); - if (!CI || !CI->isZero()) - return -1; - } - return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), - MemSizeInBits, DL); - } - - // If we have a memcpy/memmove, the only case we can handle is if this is a - // copy from constant memory. In that case, we can read directly from the - // constant memory. - MemTransferInst *MTI = cast<MemTransferInst>(MI); - - Constant *Src = dyn_cast<Constant>(MTI->getSource()); - if (!Src) - return -1; - - GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL)); - if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) - return -1; - - // See if the access is within the bounds of the transfer. - int Offset = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), - MemSizeInBits, DL); - if (Offset == -1) - return Offset; - - // Don't coerce non-integral pointers to integers or vice versa, and the - // memtransfer is implicitly a raw byte code - if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) - // TODO: Can allow nullptrs from constant zeros - return -1; - - unsigned AS = Src->getType()->getPointerAddressSpace(); - // Otherwise, see if we can constant fold a load from the constant with the - // offset applied as appropriate. - Src = - ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); - Constant *OffsetCst = - ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); - Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, - OffsetCst); - Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); - if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL)) - return Offset; - return -1; -} - -template <class T, class HelperClass> -static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy, - HelperClass &Helper, - const DataLayout &DL) { - LLVMContext &Ctx = SrcVal->getType()->getContext(); - - // If two pointers are in the same address space, they have the same size, - // so we don't need to do any truncation, etc. This avoids introducing - // ptrtoint instructions for pointers that may be non-integral. - if (SrcVal->getType()->isPointerTy() && LoadTy->isPointerTy() && - cast<PointerType>(SrcVal->getType())->getAddressSpace() == - cast<PointerType>(LoadTy)->getAddressSpace()) { - return SrcVal; - } - - uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; - uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8; - // Compute which bits of the stored value are being used by the load. Convert - // to an integer type to start with. - if (SrcVal->getType()->isPtrOrPtrVectorTy()) - SrcVal = Helper.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType())); - if (!SrcVal->getType()->isIntegerTy()) - SrcVal = Helper.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8)); - - // Shift the bits to the least significant depending on endianness. - unsigned ShiftAmt; - if (DL.isLittleEndian()) - ShiftAmt = Offset * 8; - else - ShiftAmt = (StoreSize - LoadSize - Offset) * 8; - if (ShiftAmt) - SrcVal = Helper.CreateLShr(SrcVal, - ConstantInt::get(SrcVal->getType(), ShiftAmt)); - - if (LoadSize != StoreSize) - SrcVal = Helper.CreateTruncOrBitCast(SrcVal, - IntegerType::get(Ctx, LoadSize * 8)); - return SrcVal; -} - -/// This function is called when we have a memdep query of a load that ends up -/// being a clobbering store. This means that the store provides bits used by -/// the load but the pointers don't must-alias. Check this case to see if -/// there is anything more we can do before we give up. -Value *getStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy, - Instruction *InsertPt, const DataLayout &DL) { - - IRBuilder<> Builder(InsertPt); - SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL); - return coerceAvailableValueToLoadTypeHelper(SrcVal, LoadTy, Builder, DL); -} - -Constant *getConstantStoreValueForLoad(Constant *SrcVal, unsigned Offset, - Type *LoadTy, const DataLayout &DL) { - ConstantFolder F; - SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, F, DL); - return coerceAvailableValueToLoadTypeHelper(SrcVal, LoadTy, F, DL); -} - -/// This function is called when we have a memdep query of a load that ends up -/// being a clobbering load. This means that the load *may* provide bits used -/// by the load but we can't be sure because the pointers don't must-alias. -/// Check this case to see if there is anything more we can do before we give -/// up. -Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy, - Instruction *InsertPt, const DataLayout &DL) { - // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to - // widen SrcVal out to a larger load. - unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType()); - unsigned LoadSize = DL.getTypeStoreSize(LoadTy); - if (Offset + LoadSize > SrcValStoreSize) { - assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!"); - assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load"); - // If we have a load/load clobber an DepLI can be widened to cover this - // load, then we should widen it to the next power of 2 size big enough! - unsigned NewLoadSize = Offset + LoadSize; - if (!isPowerOf2_32(NewLoadSize)) - NewLoadSize = NextPowerOf2(NewLoadSize); - - Value *PtrVal = SrcVal->getPointerOperand(); - // Insert the new load after the old load. This ensures that subsequent - // memdep queries will find the new load. We can't easily remove the old - // load completely because it is already in the value numbering table. - IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); - Type *DestTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8); - Type *DestPTy = - PointerType::get(DestTy, PtrVal->getType()->getPointerAddressSpace()); - Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); - PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); - LoadInst *NewLoad = Builder.CreateLoad(DestTy, PtrVal); - NewLoad->takeName(SrcVal); - NewLoad->setAlignment(SrcVal->getAlignment()); - - LLVM_DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); - LLVM_DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); - - // Replace uses of the original load with the wider load. On a big endian - // system, we need to shift down to get the relevant bits. - Value *RV = NewLoad; - if (DL.isBigEndian()) - RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8); - RV = Builder.CreateTrunc(RV, SrcVal->getType()); - SrcVal->replaceAllUsesWith(RV); - - SrcVal = NewLoad; - } - - return getStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL); -} - -Constant *getConstantLoadValueForLoad(Constant *SrcVal, unsigned Offset, - Type *LoadTy, const DataLayout &DL) { - unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType()); - unsigned LoadSize = DL.getTypeStoreSize(LoadTy); - if (Offset + LoadSize > SrcValStoreSize) - return nullptr; - return getConstantStoreValueForLoad(SrcVal, Offset, LoadTy, DL); -} - -template <class T, class HelperClass> -T *getMemInstValueForLoadHelper(MemIntrinsic *SrcInst, unsigned Offset, - Type *LoadTy, HelperClass &Helper, - const DataLayout &DL) { - LLVMContext &Ctx = LoadTy->getContext(); - uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy) / 8; - - // We know that this method is only called when the mem transfer fully - // provides the bits for the load. - if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { - // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and - // independently of what the offset is. - T *Val = cast<T>(MSI->getValue()); - if (LoadSize != 1) - Val = - Helper.CreateZExtOrBitCast(Val, IntegerType::get(Ctx, LoadSize * 8)); - T *OneElt = Val; - - // Splat the value out to the right number of bits. - for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize;) { - // If we can double the number of bytes set, do it. - if (NumBytesSet * 2 <= LoadSize) { - T *ShVal = Helper.CreateShl( - Val, ConstantInt::get(Val->getType(), NumBytesSet * 8)); - Val = Helper.CreateOr(Val, ShVal); - NumBytesSet <<= 1; - continue; - } - - // Otherwise insert one byte at a time. - T *ShVal = Helper.CreateShl(Val, ConstantInt::get(Val->getType(), 1 * 8)); - Val = Helper.CreateOr(OneElt, ShVal); - ++NumBytesSet; - } - - return coerceAvailableValueToLoadTypeHelper(Val, LoadTy, Helper, DL); - } - - // Otherwise, this is a memcpy/memmove from a constant global. - MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); - Constant *Src = cast<Constant>(MTI->getSource()); - unsigned AS = Src->getType()->getPointerAddressSpace(); - - // Otherwise, see if we can constant fold a load from the constant with the - // offset applied as appropriate. - Src = - ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); - Constant *OffsetCst = - ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); - Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, - OffsetCst); - Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); - return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL); -} - -/// This function is called when we have a -/// memdep query of a load that ends up being a clobbering mem intrinsic. -Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, - Type *LoadTy, Instruction *InsertPt, - const DataLayout &DL) { - IRBuilder<> Builder(InsertPt); - return getMemInstValueForLoadHelper<Value, IRBuilder<>>(SrcInst, Offset, - LoadTy, Builder, DL); -} - -Constant *getConstantMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, - Type *LoadTy, const DataLayout &DL) { - // The only case analyzeLoadFromClobberingMemInst cannot be converted to a - // constant is when it's a memset of a non-constant. - if (auto *MSI = dyn_cast<MemSetInst>(SrcInst)) - if (!isa<Constant>(MSI->getValue())) - return nullptr; - ConstantFolder F; - return getMemInstValueForLoadHelper<Constant, ConstantFolder>(SrcInst, Offset, - LoadTy, F, DL); -} -} // namespace VNCoercion -} // namespace llvm diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp deleted file mode 100644 index fbc3407c301f..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ /dev/null @@ -1,1157 +0,0 @@ -//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the MapValue function, which is shared by various parts of -// the lib/Transforms/Utils library. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Argument.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalAlias.h" -#include "llvm/IR/GlobalObject.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/InlineAsm.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" -#include <cassert> -#include <limits> -#include <memory> -#include <utility> - -using namespace llvm; - -// Out of line method to get vtable etc for class. -void ValueMapTypeRemapper::anchor() {} -void ValueMaterializer::anchor() {} - -namespace { - -/// A basic block used in a BlockAddress whose function body is not yet -/// materialized. -struct DelayedBasicBlock { - BasicBlock *OldBB; - std::unique_ptr<BasicBlock> TempBB; - - DelayedBasicBlock(const BlockAddress &Old) - : OldBB(Old.getBasicBlock()), - TempBB(BasicBlock::Create(Old.getContext())) {} -}; - -struct WorklistEntry { - enum EntryKind { - MapGlobalInit, - MapAppendingVar, - MapGlobalAliasee, - RemapFunction - }; - struct GVInitTy { - GlobalVariable *GV; - Constant *Init; - }; - struct AppendingGVTy { - GlobalVariable *GV; - Constant *InitPrefix; - }; - struct GlobalAliaseeTy { - GlobalAlias *GA; - Constant *Aliasee; - }; - - unsigned Kind : 2; - unsigned MCID : 29; - unsigned AppendingGVIsOldCtorDtor : 1; - unsigned AppendingGVNumNewMembers; - union { - GVInitTy GVInit; - AppendingGVTy AppendingGV; - GlobalAliaseeTy GlobalAliasee; - Function *RemapF; - } Data; -}; - -struct MappingContext { - ValueToValueMapTy *VM; - ValueMaterializer *Materializer = nullptr; - - /// Construct a MappingContext with a value map and materializer. - explicit MappingContext(ValueToValueMapTy &VM, - ValueMaterializer *Materializer = nullptr) - : VM(&VM), Materializer(Materializer) {} -}; - -class Mapper { - friend class MDNodeMapper; - -#ifndef NDEBUG - DenseSet<GlobalValue *> AlreadyScheduled; -#endif - - RemapFlags Flags; - ValueMapTypeRemapper *TypeMapper; - unsigned CurrentMCID = 0; - SmallVector<MappingContext, 2> MCs; - SmallVector<WorklistEntry, 4> Worklist; - SmallVector<DelayedBasicBlock, 1> DelayedBBs; - SmallVector<Constant *, 16> AppendingInits; - -public: - Mapper(ValueToValueMapTy &VM, RemapFlags Flags, - ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) - : Flags(Flags), TypeMapper(TypeMapper), - MCs(1, MappingContext(VM, Materializer)) {} - - /// ValueMapper should explicitly call \a flush() before destruction. - ~Mapper() { assert(!hasWorkToDo() && "Expected to be flushed"); } - - bool hasWorkToDo() const { return !Worklist.empty(); } - - unsigned - registerAlternateMappingContext(ValueToValueMapTy &VM, - ValueMaterializer *Materializer = nullptr) { - MCs.push_back(MappingContext(VM, Materializer)); - return MCs.size() - 1; - } - - void addFlags(RemapFlags Flags); - - void remapGlobalObjectMetadata(GlobalObject &GO); - - Value *mapValue(const Value *V); - void remapInstruction(Instruction *I); - void remapFunction(Function &F); - - Constant *mapConstant(const Constant *C) { - return cast_or_null<Constant>(mapValue(C)); - } - - /// Map metadata. - /// - /// Find the mapping for MD. Guarantees that the return will be resolved - /// (not an MDNode, or MDNode::isResolved() returns true). - Metadata *mapMetadata(const Metadata *MD); - - void scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init, - unsigned MCID); - void scheduleMapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, - bool IsOldCtorDtor, - ArrayRef<Constant *> NewMembers, - unsigned MCID); - void scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee, - unsigned MCID); - void scheduleRemapFunction(Function &F, unsigned MCID); - - void flush(); - -private: - void mapGlobalInitializer(GlobalVariable &GV, Constant &Init); - void mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, - bool IsOldCtorDtor, - ArrayRef<Constant *> NewMembers); - void mapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee); - void remapFunction(Function &F, ValueToValueMapTy &VM); - - ValueToValueMapTy &getVM() { return *MCs[CurrentMCID].VM; } - ValueMaterializer *getMaterializer() { return MCs[CurrentMCID].Materializer; } - - Value *mapBlockAddress(const BlockAddress &BA); - - /// Map metadata that doesn't require visiting operands. - Optional<Metadata *> mapSimpleMetadata(const Metadata *MD); - - Metadata *mapToMetadata(const Metadata *Key, Metadata *Val); - Metadata *mapToSelf(const Metadata *MD); -}; - -class MDNodeMapper { - Mapper &M; - - /// Data about a node in \a UniquedGraph. - struct Data { - bool HasChanged = false; - unsigned ID = std::numeric_limits<unsigned>::max(); - TempMDNode Placeholder; - }; - - /// A graph of uniqued nodes. - struct UniquedGraph { - SmallDenseMap<const Metadata *, Data, 32> Info; // Node properties. - SmallVector<MDNode *, 16> POT; // Post-order traversal. - - /// Propagate changed operands through the post-order traversal. - /// - /// Iteratively update \a Data::HasChanged for each node based on \a - /// Data::HasChanged of its operands, until fixed point. - void propagateChanges(); - - /// Get a forward reference to a node to use as an operand. - Metadata &getFwdReference(MDNode &Op); - }; - - /// Worklist of distinct nodes whose operands need to be remapped. - SmallVector<MDNode *, 16> DistinctWorklist; - - // Storage for a UniquedGraph. - SmallDenseMap<const Metadata *, Data, 32> InfoStorage; - SmallVector<MDNode *, 16> POTStorage; - -public: - MDNodeMapper(Mapper &M) : M(M) {} - - /// Map a metadata node (and its transitive operands). - /// - /// Map all the (unmapped) nodes in the subgraph under \c N. The iterative - /// algorithm handles distinct nodes and uniqued node subgraphs using - /// different strategies. - /// - /// Distinct nodes are immediately mapped and added to \a DistinctWorklist - /// using \a mapDistinctNode(). Their mapping can always be computed - /// immediately without visiting operands, even if their operands change. - /// - /// The mapping for uniqued nodes depends on whether their operands change. - /// \a mapTopLevelUniquedNode() traverses the transitive uniqued subgraph of - /// a node to calculate uniqued node mappings in bulk. Distinct leafs are - /// added to \a DistinctWorklist with \a mapDistinctNode(). - /// - /// After mapping \c N itself, this function remaps the operands of the - /// distinct nodes in \a DistinctWorklist until the entire subgraph under \c - /// N has been mapped. - Metadata *map(const MDNode &N); - -private: - /// Map a top-level uniqued node and the uniqued subgraph underneath it. - /// - /// This builds up a post-order traversal of the (unmapped) uniqued subgraph - /// underneath \c FirstN and calculates the nodes' mapping. Each node uses - /// the identity mapping (\a Mapper::mapToSelf()) as long as all of its - /// operands uses the identity mapping. - /// - /// The algorithm works as follows: - /// - /// 1. \a createPOT(): traverse the uniqued subgraph under \c FirstN and - /// save the post-order traversal in the given \a UniquedGraph, tracking - /// nodes' operands change. - /// - /// 2. \a UniquedGraph::propagateChanges(): propagate changed operands - /// through the \a UniquedGraph until fixed point, following the rule - /// that if a node changes, any node that references must also change. - /// - /// 3. \a mapNodesInPOT(): map the uniqued nodes, creating new uniqued nodes - /// (referencing new operands) where necessary. - Metadata *mapTopLevelUniquedNode(const MDNode &FirstN); - - /// Try to map the operand of an \a MDNode. - /// - /// If \c Op is already mapped, return the mapping. If it's not an \a - /// MDNode, compute and return the mapping. If it's a distinct \a MDNode, - /// return the result of \a mapDistinctNode(). - /// - /// \return None if \c Op is an unmapped uniqued \a MDNode. - /// \post getMappedOp(Op) only returns None if this returns None. - Optional<Metadata *> tryToMapOperand(const Metadata *Op); - - /// Map a distinct node. - /// - /// Return the mapping for the distinct node \c N, saving the result in \a - /// DistinctWorklist for later remapping. - /// - /// \pre \c N is not yet mapped. - /// \pre \c N.isDistinct(). - MDNode *mapDistinctNode(const MDNode &N); - - /// Get a previously mapped node. - Optional<Metadata *> getMappedOp(const Metadata *Op) const; - - /// Create a post-order traversal of an unmapped uniqued node subgraph. - /// - /// This traverses the metadata graph deeply enough to map \c FirstN. It - /// uses \a tryToMapOperand() (via \a Mapper::mapSimplifiedNode()), so any - /// metadata that has already been mapped will not be part of the POT. - /// - /// Each node that has a changed operand from outside the graph (e.g., a - /// distinct node, an already-mapped uniqued node, or \a ConstantAsMetadata) - /// is marked with \a Data::HasChanged. - /// - /// \return \c true if any nodes in \c G have \a Data::HasChanged. - /// \post \c G.POT is a post-order traversal ending with \c FirstN. - /// \post \a Data::hasChanged in \c G.Info indicates whether any node needs - /// to change because of operands outside the graph. - bool createPOT(UniquedGraph &G, const MDNode &FirstN); - - /// Visit the operands of a uniqued node in the POT. - /// - /// Visit the operands in the range from \c I to \c E, returning the first - /// uniqued node we find that isn't yet in \c G. \c I is always advanced to - /// where to continue the loop through the operands. - /// - /// This sets \c HasChanged if any of the visited operands change. - MDNode *visitOperands(UniquedGraph &G, MDNode::op_iterator &I, - MDNode::op_iterator E, bool &HasChanged); - - /// Map all the nodes in the given uniqued graph. - /// - /// This visits all the nodes in \c G in post-order, using the identity - /// mapping or creating a new node depending on \a Data::HasChanged. - /// - /// \pre \a getMappedOp() returns None for nodes in \c G, but not for any of - /// their operands outside of \c G. - /// \pre \a Data::HasChanged is true for a node in \c G iff any of its - /// operands have changed. - /// \post \a getMappedOp() returns the mapped node for every node in \c G. - void mapNodesInPOT(UniquedGraph &G); - - /// Remap a node's operands using the given functor. - /// - /// Iterate through the operands of \c N and update them in place using \c - /// mapOperand. - /// - /// \pre N.isDistinct() or N.isTemporary(). - template <class OperandMapper> - void remapOperands(MDNode &N, OperandMapper mapOperand); -}; - -} // end anonymous namespace - -Value *Mapper::mapValue(const Value *V) { - ValueToValueMapTy::iterator I = getVM().find(V); - - // If the value already exists in the map, use it. - if (I != getVM().end()) { - assert(I->second && "Unexpected null mapping"); - return I->second; - } - - // If we have a materializer and it can materialize a value, use that. - if (auto *Materializer = getMaterializer()) { - if (Value *NewV = Materializer->materialize(const_cast<Value *>(V))) { - getVM()[V] = NewV; - return NewV; - } - } - - // Global values do not need to be seeded into the VM if they - // are using the identity mapping. - if (isa<GlobalValue>(V)) { - if (Flags & RF_NullMapMissingGlobalValues) - return nullptr; - return getVM()[V] = const_cast<Value *>(V); - } - - if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) { - // Inline asm may need *type* remapping. - FunctionType *NewTy = IA->getFunctionType(); - if (TypeMapper) { - NewTy = cast<FunctionType>(TypeMapper->remapType(NewTy)); - - if (NewTy != IA->getFunctionType()) - V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(), - IA->hasSideEffects(), IA->isAlignStack()); - } - - return getVM()[V] = const_cast<Value *>(V); - } - - if (const auto *MDV = dyn_cast<MetadataAsValue>(V)) { - const Metadata *MD = MDV->getMetadata(); - - if (auto *LAM = dyn_cast<LocalAsMetadata>(MD)) { - // Look through to grab the local value. - if (Value *LV = mapValue(LAM->getValue())) { - if (V == LAM->getValue()) - return const_cast<Value *>(V); - return MetadataAsValue::get(V->getContext(), ValueAsMetadata::get(LV)); - } - - // FIXME: always return nullptr once Verifier::verifyDominatesUse() - // ensures metadata operands only reference defined SSA values. - return (Flags & RF_IgnoreMissingLocals) - ? nullptr - : MetadataAsValue::get(V->getContext(), - MDTuple::get(V->getContext(), None)); - } - - // If this is a module-level metadata and we know that nothing at the module - // level is changing, then use an identity mapping. - if (Flags & RF_NoModuleLevelChanges) - return getVM()[V] = const_cast<Value *>(V); - - // Map the metadata and turn it into a value. - auto *MappedMD = mapMetadata(MD); - if (MD == MappedMD) - return getVM()[V] = const_cast<Value *>(V); - return getVM()[V] = MetadataAsValue::get(V->getContext(), MappedMD); - } - - // Okay, this either must be a constant (which may or may not be mappable) or - // is something that is not in the mapping table. - Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V)); - if (!C) - return nullptr; - - if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) - return mapBlockAddress(*BA); - - auto mapValueOrNull = [this](Value *V) { - auto Mapped = mapValue(V); - assert((Mapped || (Flags & RF_NullMapMissingGlobalValues)) && - "Unexpected null mapping for constant operand without " - "NullMapMissingGlobalValues flag"); - return Mapped; - }; - - // Otherwise, we have some other constant to remap. Start by checking to see - // if all operands have an identity remapping. - unsigned OpNo = 0, NumOperands = C->getNumOperands(); - Value *Mapped = nullptr; - for (; OpNo != NumOperands; ++OpNo) { - Value *Op = C->getOperand(OpNo); - Mapped = mapValueOrNull(Op); - if (!Mapped) - return nullptr; - if (Mapped != Op) - break; - } - - // See if the type mapper wants to remap the type as well. - Type *NewTy = C->getType(); - if (TypeMapper) - NewTy = TypeMapper->remapType(NewTy); - - // If the result type and all operands match up, then just insert an identity - // mapping. - if (OpNo == NumOperands && NewTy == C->getType()) - return getVM()[V] = C; - - // Okay, we need to create a new constant. We've already processed some or - // all of the operands, set them all up now. - SmallVector<Constant*, 8> Ops; - Ops.reserve(NumOperands); - for (unsigned j = 0; j != OpNo; ++j) - Ops.push_back(cast<Constant>(C->getOperand(j))); - - // If one of the operands mismatch, push it and the other mapped operands. - if (OpNo != NumOperands) { - Ops.push_back(cast<Constant>(Mapped)); - - // Map the rest of the operands that aren't processed yet. - for (++OpNo; OpNo != NumOperands; ++OpNo) { - Mapped = mapValueOrNull(C->getOperand(OpNo)); - if (!Mapped) - return nullptr; - Ops.push_back(cast<Constant>(Mapped)); - } - } - Type *NewSrcTy = nullptr; - if (TypeMapper) - if (auto *GEPO = dyn_cast<GEPOperator>(C)) - NewSrcTy = TypeMapper->remapType(GEPO->getSourceElementType()); - - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - return getVM()[V] = CE->getWithOperands(Ops, NewTy, false, NewSrcTy); - if (isa<ConstantArray>(C)) - return getVM()[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops); - if (isa<ConstantStruct>(C)) - return getVM()[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops); - if (isa<ConstantVector>(C)) - return getVM()[V] = ConstantVector::get(Ops); - // If this is a no-operand constant, it must be because the type was remapped. - if (isa<UndefValue>(C)) - return getVM()[V] = UndefValue::get(NewTy); - if (isa<ConstantAggregateZero>(C)) - return getVM()[V] = ConstantAggregateZero::get(NewTy); - assert(isa<ConstantPointerNull>(C)); - return getVM()[V] = ConstantPointerNull::get(cast<PointerType>(NewTy)); -} - -Value *Mapper::mapBlockAddress(const BlockAddress &BA) { - Function *F = cast<Function>(mapValue(BA.getFunction())); - - // F may not have materialized its initializer. In that case, create a - // dummy basic block for now, and replace it once we've materialized all - // the initializers. - BasicBlock *BB; - if (F->empty()) { - DelayedBBs.push_back(DelayedBasicBlock(BA)); - BB = DelayedBBs.back().TempBB.get(); - } else { - BB = cast_or_null<BasicBlock>(mapValue(BA.getBasicBlock())); - } - - return getVM()[&BA] = BlockAddress::get(F, BB ? BB : BA.getBasicBlock()); -} - -Metadata *Mapper::mapToMetadata(const Metadata *Key, Metadata *Val) { - getVM().MD()[Key].reset(Val); - return Val; -} - -Metadata *Mapper::mapToSelf(const Metadata *MD) { - return mapToMetadata(MD, const_cast<Metadata *>(MD)); -} - -Optional<Metadata *> MDNodeMapper::tryToMapOperand(const Metadata *Op) { - if (!Op) - return nullptr; - - if (Optional<Metadata *> MappedOp = M.mapSimpleMetadata(Op)) { -#ifndef NDEBUG - if (auto *CMD = dyn_cast<ConstantAsMetadata>(Op)) - assert((!*MappedOp || M.getVM().count(CMD->getValue()) || - M.getVM().getMappedMD(Op)) && - "Expected Value to be memoized"); - else - assert((isa<MDString>(Op) || M.getVM().getMappedMD(Op)) && - "Expected result to be memoized"); -#endif - return *MappedOp; - } - - const MDNode &N = *cast<MDNode>(Op); - if (N.isDistinct()) - return mapDistinctNode(N); - return None; -} - -static Metadata *cloneOrBuildODR(const MDNode &N) { - auto *CT = dyn_cast<DICompositeType>(&N); - // If ODR type uniquing is enabled, we would have uniqued composite types - // with identifiers during bitcode reading, so we can just use CT. - if (CT && CT->getContext().isODRUniquingDebugTypes() && - CT->getIdentifier() != "") - return const_cast<DICompositeType *>(CT); - return MDNode::replaceWithDistinct(N.clone()); -} - -MDNode *MDNodeMapper::mapDistinctNode(const MDNode &N) { - assert(N.isDistinct() && "Expected a distinct node"); - assert(!M.getVM().getMappedMD(&N) && "Expected an unmapped node"); - DistinctWorklist.push_back( - cast<MDNode>((M.Flags & RF_MoveDistinctMDs) - ? M.mapToSelf(&N) - : M.mapToMetadata(&N, cloneOrBuildODR(N)))); - return DistinctWorklist.back(); -} - -static ConstantAsMetadata *wrapConstantAsMetadata(const ConstantAsMetadata &CMD, - Value *MappedV) { - if (CMD.getValue() == MappedV) - return const_cast<ConstantAsMetadata *>(&CMD); - return MappedV ? ConstantAsMetadata::getConstant(MappedV) : nullptr; -} - -Optional<Metadata *> MDNodeMapper::getMappedOp(const Metadata *Op) const { - if (!Op) - return nullptr; - - if (Optional<Metadata *> MappedOp = M.getVM().getMappedMD(Op)) - return *MappedOp; - - if (isa<MDString>(Op)) - return const_cast<Metadata *>(Op); - - if (auto *CMD = dyn_cast<ConstantAsMetadata>(Op)) - return wrapConstantAsMetadata(*CMD, M.getVM().lookup(CMD->getValue())); - - return None; -} - -Metadata &MDNodeMapper::UniquedGraph::getFwdReference(MDNode &Op) { - auto Where = Info.find(&Op); - assert(Where != Info.end() && "Expected a valid reference"); - - auto &OpD = Where->second; - if (!OpD.HasChanged) - return Op; - - // Lazily construct a temporary node. - if (!OpD.Placeholder) - OpD.Placeholder = Op.clone(); - - return *OpD.Placeholder; -} - -template <class OperandMapper> -void MDNodeMapper::remapOperands(MDNode &N, OperandMapper mapOperand) { - assert(!N.isUniqued() && "Expected distinct or temporary nodes"); - for (unsigned I = 0, E = N.getNumOperands(); I != E; ++I) { - Metadata *Old = N.getOperand(I); - Metadata *New = mapOperand(Old); - - if (Old != New) - N.replaceOperandWith(I, New); - } -} - -namespace { - -/// An entry in the worklist for the post-order traversal. -struct POTWorklistEntry { - MDNode *N; ///< Current node. - MDNode::op_iterator Op; ///< Current operand of \c N. - - /// Keep a flag of whether operands have changed in the worklist to avoid - /// hitting the map in \a UniquedGraph. - bool HasChanged = false; - - POTWorklistEntry(MDNode &N) : N(&N), Op(N.op_begin()) {} -}; - -} // end anonymous namespace - -bool MDNodeMapper::createPOT(UniquedGraph &G, const MDNode &FirstN) { - assert(G.Info.empty() && "Expected a fresh traversal"); - assert(FirstN.isUniqued() && "Expected uniqued node in POT"); - - // Construct a post-order traversal of the uniqued subgraph under FirstN. - bool AnyChanges = false; - SmallVector<POTWorklistEntry, 16> Worklist; - Worklist.push_back(POTWorklistEntry(const_cast<MDNode &>(FirstN))); - (void)G.Info[&FirstN]; - while (!Worklist.empty()) { - // Start or continue the traversal through the this node's operands. - auto &WE = Worklist.back(); - if (MDNode *N = visitOperands(G, WE.Op, WE.N->op_end(), WE.HasChanged)) { - // Push a new node to traverse first. - Worklist.push_back(POTWorklistEntry(*N)); - continue; - } - - // Push the node onto the POT. - assert(WE.N->isUniqued() && "Expected only uniqued nodes"); - assert(WE.Op == WE.N->op_end() && "Expected to visit all operands"); - auto &D = G.Info[WE.N]; - AnyChanges |= D.HasChanged = WE.HasChanged; - D.ID = G.POT.size(); - G.POT.push_back(WE.N); - - // Pop the node off the worklist. - Worklist.pop_back(); - } - return AnyChanges; -} - -MDNode *MDNodeMapper::visitOperands(UniquedGraph &G, MDNode::op_iterator &I, - MDNode::op_iterator E, bool &HasChanged) { - while (I != E) { - Metadata *Op = *I++; // Increment even on early return. - if (Optional<Metadata *> MappedOp = tryToMapOperand(Op)) { - // Check if the operand changes. - HasChanged |= Op != *MappedOp; - continue; - } - - // A uniqued metadata node. - MDNode &OpN = *cast<MDNode>(Op); - assert(OpN.isUniqued() && - "Only uniqued operands cannot be mapped immediately"); - if (G.Info.insert(std::make_pair(&OpN, Data())).second) - return &OpN; // This is a new one. Return it. - } - return nullptr; -} - -void MDNodeMapper::UniquedGraph::propagateChanges() { - bool AnyChanges; - do { - AnyChanges = false; - for (MDNode *N : POT) { - auto &D = Info[N]; - if (D.HasChanged) - continue; - - if (llvm::none_of(N->operands(), [&](const Metadata *Op) { - auto Where = Info.find(Op); - return Where != Info.end() && Where->second.HasChanged; - })) - continue; - - AnyChanges = D.HasChanged = true; - } - } while (AnyChanges); -} - -void MDNodeMapper::mapNodesInPOT(UniquedGraph &G) { - // Construct uniqued nodes, building forward references as necessary. - SmallVector<MDNode *, 16> CyclicNodes; - for (auto *N : G.POT) { - auto &D = G.Info[N]; - if (!D.HasChanged) { - // The node hasn't changed. - M.mapToSelf(N); - continue; - } - - // Remember whether this node had a placeholder. - bool HadPlaceholder(D.Placeholder); - - // Clone the uniqued node and remap the operands. - TempMDNode ClonedN = D.Placeholder ? std::move(D.Placeholder) : N->clone(); - remapOperands(*ClonedN, [this, &D, &G](Metadata *Old) { - if (Optional<Metadata *> MappedOp = getMappedOp(Old)) - return *MappedOp; - (void)D; - assert(G.Info[Old].ID > D.ID && "Expected a forward reference"); - return &G.getFwdReference(*cast<MDNode>(Old)); - }); - - auto *NewN = MDNode::replaceWithUniqued(std::move(ClonedN)); - M.mapToMetadata(N, NewN); - - // Nodes that were referenced out of order in the POT are involved in a - // uniquing cycle. - if (HadPlaceholder) - CyclicNodes.push_back(NewN); - } - - // Resolve cycles. - for (auto *N : CyclicNodes) - if (!N->isResolved()) - N->resolveCycles(); -} - -Metadata *MDNodeMapper::map(const MDNode &N) { - assert(DistinctWorklist.empty() && "MDNodeMapper::map is not recursive"); - assert(!(M.Flags & RF_NoModuleLevelChanges) && - "MDNodeMapper::map assumes module-level changes"); - - // Require resolved nodes whenever metadata might be remapped. - assert(N.isResolved() && "Unexpected unresolved node"); - - Metadata *MappedN = - N.isUniqued() ? mapTopLevelUniquedNode(N) : mapDistinctNode(N); - while (!DistinctWorklist.empty()) - remapOperands(*DistinctWorklist.pop_back_val(), [this](Metadata *Old) { - if (Optional<Metadata *> MappedOp = tryToMapOperand(Old)) - return *MappedOp; - return mapTopLevelUniquedNode(*cast<MDNode>(Old)); - }); - return MappedN; -} - -Metadata *MDNodeMapper::mapTopLevelUniquedNode(const MDNode &FirstN) { - assert(FirstN.isUniqued() && "Expected uniqued node"); - - // Create a post-order traversal of uniqued nodes under FirstN. - UniquedGraph G; - if (!createPOT(G, FirstN)) { - // Return early if no nodes have changed. - for (const MDNode *N : G.POT) - M.mapToSelf(N); - return &const_cast<MDNode &>(FirstN); - } - - // Update graph with all nodes that have changed. - G.propagateChanges(); - - // Map all the nodes in the graph. - mapNodesInPOT(G); - - // Return the original node, remapped. - return *getMappedOp(&FirstN); -} - -namespace { - -struct MapMetadataDisabler { - ValueToValueMapTy &VM; - - MapMetadataDisabler(ValueToValueMapTy &VM) : VM(VM) { - VM.disableMapMetadata(); - } - - ~MapMetadataDisabler() { VM.enableMapMetadata(); } -}; - -} // end anonymous namespace - -Optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) { - // If the value already exists in the map, use it. - if (Optional<Metadata *> NewMD = getVM().getMappedMD(MD)) - return *NewMD; - - if (isa<MDString>(MD)) - return const_cast<Metadata *>(MD); - - // This is a module-level metadata. If nothing at the module level is - // changing, use an identity mapping. - if ((Flags & RF_NoModuleLevelChanges)) - return const_cast<Metadata *>(MD); - - if (auto *CMD = dyn_cast<ConstantAsMetadata>(MD)) { - // Disallow recursion into metadata mapping through mapValue. - MapMetadataDisabler MMD(getVM()); - - // Don't memoize ConstantAsMetadata. Instead of lasting until the - // LLVMContext is destroyed, they can be deleted when the GlobalValue they - // reference is destructed. These aren't super common, so the extra - // indirection isn't that expensive. - return wrapConstantAsMetadata(*CMD, mapValue(CMD->getValue())); - } - - assert(isa<MDNode>(MD) && "Expected a metadata node"); - - return None; -} - -Metadata *Mapper::mapMetadata(const Metadata *MD) { - assert(MD && "Expected valid metadata"); - assert(!isa<LocalAsMetadata>(MD) && "Unexpected local metadata"); - - if (Optional<Metadata *> NewMD = mapSimpleMetadata(MD)) - return *NewMD; - - return MDNodeMapper(*this).map(*cast<MDNode>(MD)); -} - -void Mapper::flush() { - // Flush out the worklist of global values. - while (!Worklist.empty()) { - WorklistEntry E = Worklist.pop_back_val(); - CurrentMCID = E.MCID; - switch (E.Kind) { - case WorklistEntry::MapGlobalInit: - E.Data.GVInit.GV->setInitializer(mapConstant(E.Data.GVInit.Init)); - remapGlobalObjectMetadata(*E.Data.GVInit.GV); - break; - case WorklistEntry::MapAppendingVar: { - unsigned PrefixSize = AppendingInits.size() - E.AppendingGVNumNewMembers; - mapAppendingVariable(*E.Data.AppendingGV.GV, - E.Data.AppendingGV.InitPrefix, - E.AppendingGVIsOldCtorDtor, - makeArrayRef(AppendingInits).slice(PrefixSize)); - AppendingInits.resize(PrefixSize); - break; - } - case WorklistEntry::MapGlobalAliasee: - E.Data.GlobalAliasee.GA->setAliasee( - mapConstant(E.Data.GlobalAliasee.Aliasee)); - break; - case WorklistEntry::RemapFunction: - remapFunction(*E.Data.RemapF); - break; - } - } - CurrentMCID = 0; - - // Finish logic for block addresses now that all global values have been - // handled. - while (!DelayedBBs.empty()) { - DelayedBasicBlock DBB = DelayedBBs.pop_back_val(); - BasicBlock *BB = cast_or_null<BasicBlock>(mapValue(DBB.OldBB)); - DBB.TempBB->replaceAllUsesWith(BB ? BB : DBB.OldBB); - } -} - -void Mapper::remapInstruction(Instruction *I) { - // Remap operands. - for (Use &Op : I->operands()) { - Value *V = mapValue(Op); - // If we aren't ignoring missing entries, assert that something happened. - if (V) - Op = V; - else - assert((Flags & RF_IgnoreMissingLocals) && - "Referenced value not in value map!"); - } - - // Remap phi nodes' incoming blocks. - if (PHINode *PN = dyn_cast<PHINode>(I)) { - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *V = mapValue(PN->getIncomingBlock(i)); - // If we aren't ignoring missing entries, assert that something happened. - if (V) - PN->setIncomingBlock(i, cast<BasicBlock>(V)); - else - assert((Flags & RF_IgnoreMissingLocals) && - "Referenced block not in value map!"); - } - } - - // Remap attached metadata. - SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; - I->getAllMetadata(MDs); - for (const auto &MI : MDs) { - MDNode *Old = MI.second; - MDNode *New = cast_or_null<MDNode>(mapMetadata(Old)); - if (New != Old) - I->setMetadata(MI.first, New); - } - - if (!TypeMapper) - return; - - // If the instruction's type is being remapped, do so now. - if (auto CS = CallSite(I)) { - SmallVector<Type *, 3> Tys; - FunctionType *FTy = CS.getFunctionType(); - Tys.reserve(FTy->getNumParams()); - for (Type *Ty : FTy->params()) - Tys.push_back(TypeMapper->remapType(Ty)); - CS.mutateFunctionType(FunctionType::get( - TypeMapper->remapType(I->getType()), Tys, FTy->isVarArg())); - - LLVMContext &C = CS->getContext(); - AttributeList Attrs = CS.getAttributes(); - for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) { - if (Attrs.hasAttribute(i, Attribute::ByVal)) { - Type *Ty = Attrs.getAttribute(i, Attribute::ByVal).getValueAsType(); - if (!Ty) - continue; - - Attrs = Attrs.removeAttribute(C, i, Attribute::ByVal); - Attrs = Attrs.addAttribute( - C, i, Attribute::getWithByValType(C, TypeMapper->remapType(Ty))); - } - } - CS.setAttributes(Attrs); - return; - } - if (auto *AI = dyn_cast<AllocaInst>(I)) - AI->setAllocatedType(TypeMapper->remapType(AI->getAllocatedType())); - if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) { - GEP->setSourceElementType( - TypeMapper->remapType(GEP->getSourceElementType())); - GEP->setResultElementType( - TypeMapper->remapType(GEP->getResultElementType())); - } - I->mutateType(TypeMapper->remapType(I->getType())); -} - -void Mapper::remapGlobalObjectMetadata(GlobalObject &GO) { - SmallVector<std::pair<unsigned, MDNode *>, 8> MDs; - GO.getAllMetadata(MDs); - GO.clearMetadata(); - for (const auto &I : MDs) - GO.addMetadata(I.first, *cast<MDNode>(mapMetadata(I.second))); -} - -void Mapper::remapFunction(Function &F) { - // Remap the operands. - for (Use &Op : F.operands()) - if (Op) - Op = mapValue(Op); - - // Remap the metadata attachments. - remapGlobalObjectMetadata(F); - - // Remap the argument types. - if (TypeMapper) - for (Argument &A : F.args()) - A.mutateType(TypeMapper->remapType(A.getType())); - - // Remap the instructions. - for (BasicBlock &BB : F) - for (Instruction &I : BB) - remapInstruction(&I); -} - -void Mapper::mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, - bool IsOldCtorDtor, - ArrayRef<Constant *> NewMembers) { - SmallVector<Constant *, 16> Elements; - if (InitPrefix) { - unsigned NumElements = - cast<ArrayType>(InitPrefix->getType())->getNumElements(); - for (unsigned I = 0; I != NumElements; ++I) - Elements.push_back(InitPrefix->getAggregateElement(I)); - } - - PointerType *VoidPtrTy; - Type *EltTy; - if (IsOldCtorDtor) { - // FIXME: This upgrade is done during linking to support the C API. See - // also IRLinker::linkAppendingVarProto() in IRMover.cpp. - VoidPtrTy = Type::getInt8Ty(GV.getContext())->getPointerTo(); - auto &ST = *cast<StructType>(NewMembers.front()->getType()); - Type *Tys[3] = {ST.getElementType(0), ST.getElementType(1), VoidPtrTy}; - EltTy = StructType::get(GV.getContext(), Tys, false); - } - - for (auto *V : NewMembers) { - Constant *NewV; - if (IsOldCtorDtor) { - auto *S = cast<ConstantStruct>(V); - auto *E1 = cast<Constant>(mapValue(S->getOperand(0))); - auto *E2 = cast<Constant>(mapValue(S->getOperand(1))); - Constant *Null = Constant::getNullValue(VoidPtrTy); - NewV = ConstantStruct::get(cast<StructType>(EltTy), E1, E2, Null); - } else { - NewV = cast_or_null<Constant>(mapValue(V)); - } - Elements.push_back(NewV); - } - - GV.setInitializer(ConstantArray::get( - cast<ArrayType>(GV.getType()->getElementType()), Elements)); -} - -void Mapper::scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init, - unsigned MCID) { - assert(AlreadyScheduled.insert(&GV).second && "Should not reschedule"); - assert(MCID < MCs.size() && "Invalid mapping context"); - - WorklistEntry WE; - WE.Kind = WorklistEntry::MapGlobalInit; - WE.MCID = MCID; - WE.Data.GVInit.GV = &GV; - WE.Data.GVInit.Init = &Init; - Worklist.push_back(WE); -} - -void Mapper::scheduleMapAppendingVariable(GlobalVariable &GV, - Constant *InitPrefix, - bool IsOldCtorDtor, - ArrayRef<Constant *> NewMembers, - unsigned MCID) { - assert(AlreadyScheduled.insert(&GV).second && "Should not reschedule"); - assert(MCID < MCs.size() && "Invalid mapping context"); - - WorklistEntry WE; - WE.Kind = WorklistEntry::MapAppendingVar; - WE.MCID = MCID; - WE.Data.AppendingGV.GV = &GV; - WE.Data.AppendingGV.InitPrefix = InitPrefix; - WE.AppendingGVIsOldCtorDtor = IsOldCtorDtor; - WE.AppendingGVNumNewMembers = NewMembers.size(); - Worklist.push_back(WE); - AppendingInits.append(NewMembers.begin(), NewMembers.end()); -} - -void Mapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee, - unsigned MCID) { - assert(AlreadyScheduled.insert(&GA).second && "Should not reschedule"); - assert(MCID < MCs.size() && "Invalid mapping context"); - - WorklistEntry WE; - WE.Kind = WorklistEntry::MapGlobalAliasee; - WE.MCID = MCID; - WE.Data.GlobalAliasee.GA = &GA; - WE.Data.GlobalAliasee.Aliasee = &Aliasee; - Worklist.push_back(WE); -} - -void Mapper::scheduleRemapFunction(Function &F, unsigned MCID) { - assert(AlreadyScheduled.insert(&F).second && "Should not reschedule"); - assert(MCID < MCs.size() && "Invalid mapping context"); - - WorklistEntry WE; - WE.Kind = WorklistEntry::RemapFunction; - WE.MCID = MCID; - WE.Data.RemapF = &F; - Worklist.push_back(WE); -} - -void Mapper::addFlags(RemapFlags Flags) { - assert(!hasWorkToDo() && "Expected to have flushed the worklist"); - this->Flags = this->Flags | Flags; -} - -static Mapper *getAsMapper(void *pImpl) { - return reinterpret_cast<Mapper *>(pImpl); -} - -namespace { - -class FlushingMapper { - Mapper &M; - -public: - explicit FlushingMapper(void *pImpl) : M(*getAsMapper(pImpl)) { - assert(!M.hasWorkToDo() && "Expected to be flushed"); - } - - ~FlushingMapper() { M.flush(); } - - Mapper *operator->() const { return &M; } -}; - -} // end anonymous namespace - -ValueMapper::ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags, - ValueMapTypeRemapper *TypeMapper, - ValueMaterializer *Materializer) - : pImpl(new Mapper(VM, Flags, TypeMapper, Materializer)) {} - -ValueMapper::~ValueMapper() { delete getAsMapper(pImpl); } - -unsigned -ValueMapper::registerAlternateMappingContext(ValueToValueMapTy &VM, - ValueMaterializer *Materializer) { - return getAsMapper(pImpl)->registerAlternateMappingContext(VM, Materializer); -} - -void ValueMapper::addFlags(RemapFlags Flags) { - FlushingMapper(pImpl)->addFlags(Flags); -} - -Value *ValueMapper::mapValue(const Value &V) { - return FlushingMapper(pImpl)->mapValue(&V); -} - -Constant *ValueMapper::mapConstant(const Constant &C) { - return cast_or_null<Constant>(mapValue(C)); -} - -Metadata *ValueMapper::mapMetadata(const Metadata &MD) { - return FlushingMapper(pImpl)->mapMetadata(&MD); -} - -MDNode *ValueMapper::mapMDNode(const MDNode &N) { - return cast_or_null<MDNode>(mapMetadata(N)); -} - -void ValueMapper::remapInstruction(Instruction &I) { - FlushingMapper(pImpl)->remapInstruction(&I); -} - -void ValueMapper::remapFunction(Function &F) { - FlushingMapper(pImpl)->remapFunction(F); -} - -void ValueMapper::scheduleMapGlobalInitializer(GlobalVariable &GV, - Constant &Init, - unsigned MCID) { - getAsMapper(pImpl)->scheduleMapGlobalInitializer(GV, Init, MCID); -} - -void ValueMapper::scheduleMapAppendingVariable(GlobalVariable &GV, - Constant *InitPrefix, - bool IsOldCtorDtor, - ArrayRef<Constant *> NewMembers, - unsigned MCID) { - getAsMapper(pImpl)->scheduleMapAppendingVariable( - GV, InitPrefix, IsOldCtorDtor, NewMembers, MCID); -} - -void ValueMapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee, - unsigned MCID) { - getAsMapper(pImpl)->scheduleMapGlobalAliasee(GA, Aliasee, MCID); -} - -void ValueMapper::scheduleRemapFunction(Function &F, unsigned MCID) { - getAsMapper(pImpl)->scheduleRemapFunction(F, MCID); -} |