diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils')
60 files changed, 2723 insertions, 1808 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp index 364878dc588d..01912297324a 100644 --- a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp @@ -1,9 +1,8 @@ //===-- ASanStackFrameLayout.cpp - helper for AddressSanitizer ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -63,7 +62,7 @@ ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars, for (size_t i = 0; i < NumVars; i++) Vars[i].Alignment = std::max(Vars[i].Alignment, kMinAlignment); - std::stable_sort(Vars.begin(), Vars.end(), CompareVars); + llvm::stable_sort(Vars, CompareVars); ASanStackFrameLayout Layout; Layout.Granularity = Granularity; diff --git a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp index 564537af0c2a..ee0973002c47 100644 --- a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp +++ b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp @@ -1,9 +1,8 @@ //===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -209,7 +208,7 @@ static bool addDiscriminators(Function &F) { // Only the lowest 7 bits are used to represent a discriminator to fit // it in 1 byte ULEB128 representation. unsigned Discriminator = R.second ? ++LDM[L] : LDM[L]; - auto NewDIL = DIL->setBaseDiscriminator(Discriminator); + auto NewDIL = DIL->cloneWithBaseDiscriminator(Discriminator); if (!NewDIL) { LLVM_DEBUG(dbgs() << "Could not encode discriminator: " << DIL->getFilename() << ":" << DIL->getLine() << ":" @@ -246,7 +245,7 @@ static bool addDiscriminators(Function &F) { std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine()); if (!CallLocations.insert(L).second) { unsigned Discriminator = ++LDM[L]; - auto NewDIL = CurrentDIL->setBaseDiscriminator(Discriminator); + auto NewDIL = CurrentDIL->cloneWithBaseDiscriminator(Discriminator); if (!NewDIL) { LLVM_DEBUG(dbgs() << "Could not encode discriminator: " diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 7da768252fc1..5fa371377c85 100644 --- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -1,9 +1,8 @@ //===- BasicBlockUtils.cpp - BasicBlock Utilities --------------------------==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -18,6 +17,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemorySSAUpdater.h" @@ -26,7 +26,6 @@ #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" @@ -39,6 +38,8 @@ #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" #include <cassert> #include <cstdint> @@ -48,30 +49,20 @@ using namespace llvm; -void llvm::DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU) { - SmallVector<BasicBlock *, 1> BBs = {BB}; - DeleteDeadBlocks(BBs, DTU); -} - -void llvm::DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs, - DomTreeUpdater *DTU) { -#ifndef NDEBUG - // Make sure that all predecessors of each dead block is also dead. - SmallPtrSet<BasicBlock *, 4> Dead(BBs.begin(), BBs.end()); - assert(Dead.size() == BBs.size() && "Duplicating blocks?"); - for (auto *BB : Dead) - for (BasicBlock *Pred : predecessors(BB)) - assert(Dead.count(Pred) && "All predecessors must be dead!"); -#endif +#define DEBUG_TYPE "basicblock-utils" - SmallVector<DominatorTree::UpdateType, 4> Updates; +void llvm::DetatchDeadBlocks( + ArrayRef<BasicBlock *> BBs, + SmallVectorImpl<DominatorTree::UpdateType> *Updates, + bool KeepOneInputPHIs) { for (auto *BB : BBs) { // Loop through all of our successors and make sure they know that one // of their predecessors is going away. + SmallPtrSet<BasicBlock *, 4> UniqueSuccessors; for (BasicBlock *Succ : successors(BB)) { - Succ->removePredecessor(BB); - if (DTU) - Updates.push_back({DominatorTree::Delete, BB, Succ}); + Succ->removePredecessor(BB, KeepOneInputPHIs); + if (Updates && UniqueSuccessors.insert(Succ).second) + Updates->push_back({DominatorTree::Delete, BB, Succ}); } // Zap all the instructions in the block. @@ -92,8 +83,29 @@ void llvm::DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs, "The successor list of BB isn't empty before " "applying corresponding DTU updates."); } +} + +void llvm::DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU, + bool KeepOneInputPHIs) { + DeleteDeadBlocks({BB}, DTU, KeepOneInputPHIs); +} + +void llvm::DeleteDeadBlocks(ArrayRef <BasicBlock *> BBs, DomTreeUpdater *DTU, + bool KeepOneInputPHIs) { +#ifndef NDEBUG + // Make sure that all predecessors of each dead block is also dead. + SmallPtrSet<BasicBlock *, 4> Dead(BBs.begin(), BBs.end()); + assert(Dead.size() == BBs.size() && "Duplicating blocks?"); + for (auto *BB : Dead) + for (BasicBlock *Pred : predecessors(BB)) + assert(Dead.count(Pred) && "All predecessors must be dead!"); +#endif + + SmallVector<DominatorTree::UpdateType, 4> Updates; + DetatchDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs); + if (DTU) - DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true); + DTU->applyUpdatesPermissive(Updates); for (BasicBlock *BB : BBs) if (DTU) @@ -102,6 +114,28 @@ void llvm::DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs, BB->eraseFromParent(); } +bool llvm::EliminateUnreachableBlocks(Function &F, DomTreeUpdater *DTU, + bool KeepOneInputPHIs) { + df_iterator_default_set<BasicBlock*> Reachable; + + // Mark all reachable blocks. + for (BasicBlock *BB : depth_first_ext(&F, Reachable)) + (void)BB/* Mark all reachable blocks */; + + // Collect all dead blocks. + std::vector<BasicBlock*> DeadBlocks; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) + if (!Reachable.count(&*I)) { + BasicBlock *BB = &*I; + DeadBlocks.push_back(BB); + } + + // Delete the dead blocks. + DeleteDeadBlocks(DeadBlocks, DTU, KeepOneInputPHIs); + + return !DeadBlocks.empty(); +} + void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep) { if (!isa<PHINode>(BB->begin())) return; @@ -160,6 +194,9 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, if (IncValue == &PN) return false; + LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into " + << PredBB->getName() << "\n"); + // Begin by getting rid of unneeded PHIs. SmallVector<AssertingVH<Value>, 4> IncomingValues; if (isa<PHINode>(BB->front())) { @@ -175,11 +212,19 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, std::vector<DominatorTree::UpdateType> Updates; if (DTU) { Updates.reserve(1 + (2 * succ_size(BB))); - Updates.push_back({DominatorTree::Delete, PredBB, BB}); - for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + // Add insert edges first. Experimentally, for the particular case of two + // blocks that can be merged, with a single successor and single predecessor + // respectively, it is beneficial to have all insert updates first. Deleting + // edges first may lead to unreachable blocks, followed by inserting edges + // making the blocks reachable again. Such DT updates lead to high compile + // times. We add inserts before deletes here to reduce compile time. + for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) + // This successor of BB may already have PredBB as a predecessor. + if (llvm::find(successors(PredBB), *I) == succ_end(PredBB)) + Updates.push_back({DominatorTree::Insert, PredBB, *I}); + for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) Updates.push_back({DominatorTree::Delete, BB, *I}); - Updates.push_back({DominatorTree::Insert, PredBB, *I}); - } + Updates.push_back({DominatorTree::Delete, PredBB, BB}); } if (MSSAU) @@ -227,7 +272,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, isa<UnreachableInst>(BB->getTerminator()) && "The successor list of BB isn't empty before " "applying corresponding DTU updates."); - DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true); + DTU->applyUpdatesPermissive(Updates); DTU->deleteBB(BB); } @@ -534,7 +579,13 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // The new block unconditionally branches to the old block. BranchInst *BI = BranchInst::Create(BB, NewBB); - BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc()); + // Splitting the predecessors of a loop header creates a preheader block. + if (LI && LI->isLoopHeader(BB)) + // Using the loop start line number prevents debuggers stepping into the + // loop body for this instruction. + BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc()); + else + BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc()); // Move the edges from Preds to point to NewBB instead of BB. for (unsigned i = 0, e = Preds.size(); i != e; ++i) { @@ -543,6 +594,8 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // all BlockAddress uses would need to be updated. assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && "Cannot split an edge from an IndirectBrInst"); + assert(!isa<CallBrInst>(Preds[i]->getTerminator()) && + "Cannot split an edge from a CallBrInst"); Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); } @@ -711,7 +764,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, UncondBranch->eraseFromParent(); if (DTU) - DTU->deleteEdge(Pred, BB); + DTU->applyUpdates({{DominatorTree::Delete, Pred, BB}}); return cast<ReturnInst>(NewRet); } @@ -720,18 +773,23 @@ Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, - DominatorTree *DT, LoopInfo *LI) { + DominatorTree *DT, LoopInfo *LI, + BasicBlock *ThenBlock) { BasicBlock *Head = SplitBefore->getParent(); BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); Instruction *HeadOldTerm = Head->getTerminator(); LLVMContext &C = Head->getContext(); - BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); Instruction *CheckTerm; - if (Unreachable) - CheckTerm = new UnreachableInst(C, ThenBlock); - else - CheckTerm = BranchInst::Create(Tail, ThenBlock); - CheckTerm->setDebugLoc(SplitBefore->getDebugLoc()); + bool CreateThenBlock = (ThenBlock == nullptr); + if (CreateThenBlock) { + ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + if (Unreachable) + CheckTerm = new UnreachableInst(C, ThenBlock); + else + CheckTerm = BranchInst::Create(Tail, ThenBlock); + CheckTerm->setDebugLoc(SplitBefore->getDebugLoc()); + } else + CheckTerm = ThenBlock->getTerminator(); BranchInst *HeadNewTerm = BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cond); HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights); @@ -746,7 +804,10 @@ Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond, DT->changeImmediateDominator(Child, NewNode); // Head dominates ThenBlock. - DT->addNewBlock(ThenBlock, Head); + if (CreateThenBlock) + DT->addNewBlock(ThenBlock, Head); + else + DT->changeImmediateDominator(ThenBlock, Head); } } diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index fafc9aaba5c9..f5e4b53f6d97 100644 --- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -1,9 +1,8 @@ //===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -24,6 +23,7 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" @@ -49,10 +49,14 @@ namespace { bool runOnFunction(Function &F) override { auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + + auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>(); + auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr; + auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; unsigned N = - SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI)); + SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI, nullptr, PDT)); NumBroken += N; return N > 0; } @@ -145,6 +149,14 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum, // it in this generic function. if (DestBB->isEHPad()) return nullptr; + // Don't split the non-fallthrough edge from a callbr. + if (isa<CallBrInst>(TI) && SuccNum > 0) + return nullptr; + + if (Options.IgnoreUnreachableDests && + isa<UnreachableInst>(DestBB->getFirstNonPHIOrDbgOrLifetime())) + return nullptr; + // Create a new basic block, linking it into the CFG. BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); @@ -189,7 +201,7 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum, if (TI->getSuccessor(i) != DestBB) continue; // Remove an entry for TIBB from DestBB phi nodes. - DestBB->removePredecessor(TIBB, Options.DontDeleteUselessPHIs); + DestBB->removePredecessor(TIBB, Options.KeepOneInputPHIs); // We found another edge to DestBB, go to NewBB instead. TI->setSuccessor(i, NewBB); @@ -198,16 +210,17 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum, // If we have nothing to update, just return. auto *DT = Options.DT; + auto *PDT = Options.PDT; auto *LI = Options.LI; auto *MSSAU = Options.MSSAU; if (MSSAU) MSSAU->wireOldPredecessorsToNewImmediatePredecessor( DestBB, NewBB, {TIBB}, Options.MergeIdenticalEdges); - if (!DT && !LI) + if (!DT && !PDT && !LI) return NewBB; - if (DT) { + if (DT || PDT) { // Update the DominatorTree. // ---> NewBB -----\ // / V @@ -223,7 +236,10 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum, if (llvm::find(successors(TIBB), DestBB) == succ_end(TIBB)) Updates.push_back({DominatorTree::Delete, TIBB, DestBB}); - DT->applyUpdates(Updates); + if (DT) + DT->applyUpdates(Updates); + if (PDT) + PDT->applyUpdates(Updates); } // Update LoopInfo if it is around. diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 3466dedd3236..27f110e24f9c 100644 --- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -1,9 +1,8 @@ //===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -23,6 +22,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" +#include "llvm/Analysis/MemoryBuiltins.h" using namespace llvm; @@ -121,6 +121,13 @@ static bool setNonLazyBind(Function &F) { return true; } +static bool setDoesNotFreeMemory(Function &F) { + if (F.hasFnAttribute(Attribute::NoFree)) + return false; + F.addFnAttr(Attribute::NoFree); + return true; +} + bool llvm::inferLibFuncAttributes(Module *M, StringRef Name, const TargetLibraryInfo &TLI) { Function *F = M->getFunction(Name); @@ -136,6 +143,9 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { bool Changed = false; + if(!isLibFreeFunction(&F, TheLibFunc) && !isReallocLikeFn(&F, &TLI)) + Changed |= setDoesNotFreeMemory(F); + if (F.getParent() != nullptr && F.getParent()->getRtLibUseGOT()) Changed |= setNonLazyBind(F); @@ -790,95 +800,76 @@ Value *llvm::castToCStr(Value *V, IRBuilder<> &B) { return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr"); } -Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL, - const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_strlen)) +static Value *emitLibCall(LibFunc TheLibFunc, Type *ReturnType, + ArrayRef<Type *> ParamTypes, + ArrayRef<Value *> Operands, IRBuilder<> &B, + const TargetLibraryInfo *TLI, + bool IsVaArgs = false) { + if (!TLI->has(TheLibFunc)) return nullptr; Module *M = B.GetInsertBlock()->getModule(); - StringRef StrlenName = TLI->getName(LibFunc_strlen); - LLVMContext &Context = B.GetInsertBlock()->getContext(); - Constant *StrLen = M->getOrInsertFunction(StrlenName, DL.getIntPtrType(Context), - B.getInt8PtrTy()); - inferLibFuncAttributes(M, StrlenName, *TLI); - CallInst *CI = B.CreateCall(StrLen, castToCStr(Ptr, B), StrlenName); - if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts())) + StringRef FuncName = TLI->getName(TheLibFunc); + FunctionType *FuncType = FunctionType::get(ReturnType, ParamTypes, IsVaArgs); + FunctionCallee Callee = M->getOrInsertFunction(FuncName, FuncType); + inferLibFuncAttributes(M, FuncName, *TLI); + CallInst *CI = B.CreateCall(Callee, Operands, FuncName); + if (const Function *F = + dyn_cast<Function>(Callee.getCallee()->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); - return CI; } -Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B, +Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_strchr)) - return nullptr; + LLVMContext &Context = B.GetInsertBlock()->getContext(); + return emitLibCall(LibFunc_strlen, DL.getIntPtrType(Context), + B.getInt8PtrTy(), castToCStr(Ptr, B), B, TLI); +} - Module *M = B.GetInsertBlock()->getModule(); - StringRef StrChrName = TLI->getName(LibFunc_strchr); +Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { Type *I8Ptr = B.getInt8PtrTy(); Type *I32Ty = B.getInt32Ty(); - Constant *StrChr = - M->getOrInsertFunction(StrChrName, I8Ptr, I8Ptr, I32Ty); - inferLibFuncAttributes(M, StrChrName, *TLI); - CallInst *CI = B.CreateCall( - StrChr, {castToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, StrChrName); - if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - return CI; + return emitLibCall(LibFunc_strchr, I8Ptr, {I8Ptr, I32Ty}, + {castToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, B, TLI); } Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_strncmp)) - return nullptr; - - Module *M = B.GetInsertBlock()->getModule(); - StringRef StrNCmpName = TLI->getName(LibFunc_strncmp); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *StrNCmp = M->getOrInsertFunction(StrNCmpName, B.getInt32Ty(), - B.getInt8PtrTy(), B.getInt8PtrTy(), - DL.getIntPtrType(Context)); - inferLibFuncAttributes(M, StrNCmpName, *TLI); - CallInst *CI = B.CreateCall( - StrNCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, StrNCmpName); - - if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; + return emitLibCall( + LibFunc_strncmp, B.getInt32Ty(), + {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)}, + {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI); } Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, - const TargetLibraryInfo *TLI, StringRef Name) { - if (!TLI->has(LibFunc_strcpy)) - return nullptr; + const TargetLibraryInfo *TLI) { + Type *I8Ptr = B.getInt8PtrTy(); + return emitLibCall(LibFunc_strcpy, I8Ptr, {I8Ptr, I8Ptr}, + {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI); +} - Module *M = B.GetInsertBlock()->getModule(); +Value *llvm::emitStpCpy(Value *Dst, Value *Src, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { Type *I8Ptr = B.getInt8PtrTy(); - Value *StrCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr); - inferLibFuncAttributes(M, Name, *TLI); - CallInst *CI = - B.CreateCall(StrCpy, {castToCStr(Dst, B), castToCStr(Src, B)}, Name); - if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - return CI; + return emitLibCall(LibFunc_stpcpy, I8Ptr, {I8Ptr, I8Ptr}, + {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI); } Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B, - const TargetLibraryInfo *TLI, StringRef Name) { - if (!TLI->has(LibFunc_strncpy)) - return nullptr; + const TargetLibraryInfo *TLI) { + Type *I8Ptr = B.getInt8PtrTy(); + return emitLibCall(LibFunc_strncpy, I8Ptr, {I8Ptr, I8Ptr, Len->getType()}, + {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI); +} - Module *M = B.GetInsertBlock()->getModule(); +Value *llvm::emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { Type *I8Ptr = B.getInt8PtrTy(); - Value *StrNCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr, - Len->getType()); - inferLibFuncAttributes(M, Name, *TLI); - CallInst *CI = B.CreateCall( - StrNCpy, {castToCStr(Dst, B), castToCStr(Src, B), Len}, Name); - if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - return CI; + return emitLibCall(LibFunc_stpncpy, I8Ptr, {I8Ptr, I8Ptr, Len->getType()}, + {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI); } Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, @@ -892,57 +883,115 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, AS = AttributeList::get(M->getContext(), AttributeList::FunctionIndex, Attribute::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemCpy = M->getOrInsertFunction( + FunctionCallee MemCpy = M->getOrInsertFunction( "__memcpy_chk", AttributeList::get(M->getContext(), AS), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), DL.getIntPtrType(Context)); Dst = castToCStr(Dst, B); Src = castToCStr(Src, B); CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize}); - if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts())) + if (const Function *F = + dyn_cast<Function>(MemCpy.getCallee()->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); return CI; } Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_memchr)) - return nullptr; - - Module *M = B.GetInsertBlock()->getModule(); - StringRef MemChrName = TLI->getName(LibFunc_memchr); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemChr = M->getOrInsertFunction(MemChrName, B.getInt8PtrTy(), - B.getInt8PtrTy(), B.getInt32Ty(), - DL.getIntPtrType(Context)); - inferLibFuncAttributes(M, MemChrName, *TLI); - CallInst *CI = B.CreateCall(MemChr, {castToCStr(Ptr, B), Val, Len}, MemChrName); - - if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; + return emitLibCall( + LibFunc_memchr, B.getInt8PtrTy(), + {B.getInt8PtrTy(), B.getInt32Ty(), DL.getIntPtrType(Context)}, + {castToCStr(Ptr, B), Val, Len}, B, TLI); } Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc_memcmp)) - return nullptr; + LLVMContext &Context = B.GetInsertBlock()->getContext(); + return emitLibCall( + LibFunc_memcmp, B.getInt32Ty(), + {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)}, + {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI); +} - Module *M = B.GetInsertBlock()->getModule(); - StringRef MemCmpName = TLI->getName(LibFunc_memcmp); +Value *llvm::emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemCmp = M->getOrInsertFunction(MemCmpName, B.getInt32Ty(), - B.getInt8PtrTy(), B.getInt8PtrTy(), - DL.getIntPtrType(Context)); - inferLibFuncAttributes(M, MemCmpName, *TLI); - CallInst *CI = B.CreateCall( - MemCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, MemCmpName); - - if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); + return emitLibCall( + LibFunc_bcmp, B.getInt32Ty(), + {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)}, + {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI); +} - return CI; +Value *llvm::emitMemCCpy(Value *Ptr1, Value *Ptr2, Value *Val, Value *Len, + IRBuilder<> &B, const TargetLibraryInfo *TLI) { + return emitLibCall( + LibFunc_memccpy, B.getInt8PtrTy(), + {B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt32Ty(), Len->getType()}, + {Ptr1, Ptr2, Val, Len}, B, TLI); +} + +Value *llvm::emitSNPrintf(Value *Dest, Value *Size, Value *Fmt, + ArrayRef<Value *> VariadicArgs, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { + SmallVector<Value *, 8> Args{castToCStr(Dest, B), Size, castToCStr(Fmt, B)}; + Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end()); + return emitLibCall(LibFunc_snprintf, B.getInt32Ty(), + {B.getInt8PtrTy(), Size->getType(), B.getInt8PtrTy()}, + Args, B, TLI, /*IsVaArgs=*/true); +} + +Value *llvm::emitSPrintf(Value *Dest, Value *Fmt, + ArrayRef<Value *> VariadicArgs, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { + SmallVector<Value *, 8> Args{castToCStr(Dest, B), castToCStr(Fmt, B)}; + Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end()); + return emitLibCall(LibFunc_sprintf, B.getInt32Ty(), + {B.getInt8PtrTy(), B.getInt8PtrTy()}, Args, B, TLI, + /*IsVaArgs=*/true); +} + +Value *llvm::emitStrCat(Value *Dest, Value *Src, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { + return emitLibCall(LibFunc_strcat, B.getInt8PtrTy(), + {B.getInt8PtrTy(), B.getInt8PtrTy()}, + {castToCStr(Dest, B), castToCStr(Src, B)}, B, TLI); +} + +Value *llvm::emitStrLCpy(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { + return emitLibCall(LibFunc_strlcpy, Size->getType(), + {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()}, + {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI); +} + +Value *llvm::emitStrLCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { + return emitLibCall(LibFunc_strlcat, Size->getType(), + {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()}, + {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI); +} + +Value *llvm::emitStrNCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { + return emitLibCall(LibFunc_strncat, B.getInt8PtrTy(), + {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()}, + {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI); +} + +Value *llvm::emitVSNPrintf(Value *Dest, Value *Size, Value *Fmt, Value *VAList, + IRBuilder<> &B, const TargetLibraryInfo *TLI) { + return emitLibCall( + LibFunc_vsnprintf, B.getInt32Ty(), + {B.getInt8PtrTy(), Size->getType(), B.getInt8PtrTy(), VAList->getType()}, + {castToCStr(Dest, B), Size, castToCStr(Fmt, B), VAList}, B, TLI); +} + +Value *llvm::emitVSPrintf(Value *Dest, Value *Fmt, Value *VAList, + IRBuilder<> &B, const TargetLibraryInfo *TLI) { + return emitLibCall(LibFunc_vsprintf, B.getInt32Ty(), + {B.getInt8PtrTy(), B.getInt8PtrTy(), VAList->getType()}, + {castToCStr(Dest, B), castToCStr(Fmt, B), VAList}, B, TLI); } /// Append a suffix to the function name according to the type of 'Op'. @@ -966,8 +1015,8 @@ static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name, assert((Name != "") && "Must specify Name to emitUnaryFloatFnCall"); Module *M = B.GetInsertBlock()->getModule(); - Value *Callee = M->getOrInsertFunction(Name, Op->getType(), - Op->getType()); + FunctionCallee Callee = + M->getOrInsertFunction(Name, Op->getType(), Op->getType()); CallInst *CI = B.CreateCall(Callee, Op, Name); // The incoming attribute set may have come from a speculatable intrinsic, but @@ -976,7 +1025,8 @@ static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name, CI->setAttributes(Attrs.removeAttribute(B.getContext(), AttributeList::FunctionIndex, Attribute::Speculatable)); - if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) + if (const Function *F = + dyn_cast<Function>(Callee.getCallee()->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); return CI; @@ -1009,11 +1059,12 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, appendTypeSuffix(Op1, Name, NameBuffer); Module *M = B.GetInsertBlock()->getModule(); - Value *Callee = M->getOrInsertFunction(Name, Op1->getType(), Op1->getType(), - Op2->getType()); + FunctionCallee Callee = M->getOrInsertFunction( + Name, Op1->getType(), Op1->getType(), Op2->getType()); CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name); CI->setAttributes(Attrs); - if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) + if (const Function *F = + dyn_cast<Function>(Callee.getCallee()->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); return CI; @@ -1026,7 +1077,8 @@ Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getModule(); StringRef PutCharName = TLI->getName(LibFunc_putchar); - Value *PutChar = M->getOrInsertFunction(PutCharName, B.getInt32Ty(), B.getInt32Ty()); + FunctionCallee PutChar = + M->getOrInsertFunction(PutCharName, B.getInt32Ty(), B.getInt32Ty()); inferLibFuncAttributes(M, PutCharName, *TLI); CallInst *CI = B.CreateCall(PutChar, B.CreateIntCast(Char, @@ -1035,7 +1087,8 @@ Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B, "chari"), PutCharName); - if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts())) + if (const Function *F = + dyn_cast<Function>(PutChar.getCallee()->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); return CI; } @@ -1047,11 +1100,12 @@ Value *llvm::emitPutS(Value *Str, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getModule(); StringRef PutsName = TLI->getName(LibFunc_puts); - Value *PutS = + FunctionCallee PutS = M->getOrInsertFunction(PutsName, B.getInt32Ty(), B.getInt8PtrTy()); inferLibFuncAttributes(M, PutsName, *TLI); CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), PutsName); - if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts())) + if (const Function *F = + dyn_cast<Function>(PutS.getCallee()->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); return CI; } @@ -1063,15 +1117,16 @@ Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getModule(); StringRef FPutcName = TLI->getName(LibFunc_fputc); - Constant *F = M->getOrInsertFunction(FPutcName, B.getInt32Ty(), B.getInt32Ty(), - File->getType()); + FunctionCallee F = M->getOrInsertFunction(FPutcName, B.getInt32Ty(), + B.getInt32Ty(), File->getType()); if (File->getType()->isPointerTy()) inferLibFuncAttributes(M, FPutcName, *TLI); Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true, "chari"); CallInst *CI = B.CreateCall(F, {Char, File}, FPutcName); - if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + if (const Function *Fn = + dyn_cast<Function>(F.getCallee()->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); return CI; } @@ -1083,14 +1138,15 @@ Value *llvm::emitFPutCUnlocked(Value *Char, Value *File, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getModule(); StringRef FPutcUnlockedName = TLI->getName(LibFunc_fputc_unlocked); - Constant *F = M->getOrInsertFunction(FPutcUnlockedName, B.getInt32Ty(), - B.getInt32Ty(), File->getType()); + FunctionCallee F = M->getOrInsertFunction(FPutcUnlockedName, B.getInt32Ty(), + B.getInt32Ty(), File->getType()); if (File->getType()->isPointerTy()) inferLibFuncAttributes(M, FPutcUnlockedName, *TLI); Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/ true, "chari"); CallInst *CI = B.CreateCall(F, {Char, File}, FPutcUnlockedName); - if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + if (const Function *Fn = + dyn_cast<Function>(F.getCallee()->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); return CI; } @@ -1102,13 +1158,14 @@ Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getModule(); StringRef FPutsName = TLI->getName(LibFunc_fputs); - Constant *F = M->getOrInsertFunction( - FPutsName, B.getInt32Ty(), B.getInt8PtrTy(), File->getType()); + FunctionCallee F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(), + B.getInt8PtrTy(), File->getType()); if (File->getType()->isPointerTy()) inferLibFuncAttributes(M, FPutsName, *TLI); CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsName); - if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + if (const Function *Fn = + dyn_cast<Function>(F.getCallee()->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); return CI; } @@ -1120,13 +1177,14 @@ Value *llvm::emitFPutSUnlocked(Value *Str, Value *File, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getModule(); StringRef FPutsUnlockedName = TLI->getName(LibFunc_fputs_unlocked); - Constant *F = M->getOrInsertFunction(FPutsUnlockedName, B.getInt32Ty(), - B.getInt8PtrTy(), File->getType()); + FunctionCallee F = M->getOrInsertFunction(FPutsUnlockedName, B.getInt32Ty(), + B.getInt8PtrTy(), File->getType()); if (File->getType()->isPointerTy()) inferLibFuncAttributes(M, FPutsUnlockedName, *TLI); CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsUnlockedName); - if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + if (const Function *Fn = + dyn_cast<Function>(F.getCallee()->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); return CI; } @@ -1139,7 +1197,7 @@ Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getModule(); LLVMContext &Context = B.GetInsertBlock()->getContext(); StringRef FWriteName = TLI->getName(LibFunc_fwrite); - Constant *F = M->getOrInsertFunction( + FunctionCallee F = M->getOrInsertFunction( FWriteName, DL.getIntPtrType(Context), B.getInt8PtrTy(), DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType()); @@ -1149,7 +1207,8 @@ Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B, B.CreateCall(F, {castToCStr(Ptr, B), Size, ConstantInt::get(DL.getIntPtrType(Context), 1), File}); - if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + if (const Function *Fn = + dyn_cast<Function>(F.getCallee()->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); return CI; } @@ -1162,12 +1221,13 @@ Value *llvm::emitMalloc(Value *Num, IRBuilder<> &B, const DataLayout &DL, Module *M = B.GetInsertBlock()->getModule(); StringRef MallocName = TLI->getName(LibFunc_malloc); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *Malloc = M->getOrInsertFunction(MallocName, B.getInt8PtrTy(), - DL.getIntPtrType(Context)); + FunctionCallee Malloc = M->getOrInsertFunction(MallocName, B.getInt8PtrTy(), + DL.getIntPtrType(Context)); inferLibFuncAttributes(M, MallocName, *TLI); CallInst *CI = B.CreateCall(Malloc, Num, MallocName); - if (const Function *F = dyn_cast<Function>(Malloc->stripPointerCasts())) + if (const Function *F = + dyn_cast<Function>(Malloc.getCallee()->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); return CI; @@ -1182,12 +1242,13 @@ Value *llvm::emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs, StringRef CallocName = TLI.getName(LibFunc_calloc); const DataLayout &DL = M->getDataLayout(); IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext())); - Value *Calloc = M->getOrInsertFunction(CallocName, Attrs, B.getInt8PtrTy(), - PtrType, PtrType); + FunctionCallee Calloc = M->getOrInsertFunction( + CallocName, Attrs, B.getInt8PtrTy(), PtrType, PtrType); inferLibFuncAttributes(M, CallocName, TLI); CallInst *CI = B.CreateCall(Calloc, {Num, Size}, CallocName); - if (const auto *F = dyn_cast<Function>(Calloc->stripPointerCasts())) + if (const auto *F = + dyn_cast<Function>(Calloc.getCallee()->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); return CI; @@ -1202,7 +1263,7 @@ Value *llvm::emitFWriteUnlocked(Value *Ptr, Value *Size, Value *N, Value *File, Module *M = B.GetInsertBlock()->getModule(); LLVMContext &Context = B.GetInsertBlock()->getContext(); StringRef FWriteUnlockedName = TLI->getName(LibFunc_fwrite_unlocked); - Constant *F = M->getOrInsertFunction( + FunctionCallee F = M->getOrInsertFunction( FWriteUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(), DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType()); @@ -1210,7 +1271,8 @@ Value *llvm::emitFWriteUnlocked(Value *Ptr, Value *Size, Value *N, Value *File, inferLibFuncAttributes(M, FWriteUnlockedName, *TLI); CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File}); - if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + if (const Function *Fn = + dyn_cast<Function>(F.getCallee()->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); return CI; } @@ -1222,13 +1284,14 @@ Value *llvm::emitFGetCUnlocked(Value *File, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getModule(); StringRef FGetCUnlockedName = TLI->getName(LibFunc_fgetc_unlocked); - Constant *F = - M->getOrInsertFunction(FGetCUnlockedName, B.getInt32Ty(), File->getType()); + FunctionCallee F = M->getOrInsertFunction(FGetCUnlockedName, B.getInt32Ty(), + File->getType()); if (File->getType()->isPointerTy()) inferLibFuncAttributes(M, FGetCUnlockedName, *TLI); CallInst *CI = B.CreateCall(F, File, FGetCUnlockedName); - if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + if (const Function *Fn = + dyn_cast<Function>(F.getCallee()->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); return CI; } @@ -1240,14 +1303,15 @@ Value *llvm::emitFGetSUnlocked(Value *Str, Value *Size, Value *File, Module *M = B.GetInsertBlock()->getModule(); StringRef FGetSUnlockedName = TLI->getName(LibFunc_fgets_unlocked); - Constant *F = + FunctionCallee F = M->getOrInsertFunction(FGetSUnlockedName, B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt32Ty(), File->getType()); inferLibFuncAttributes(M, FGetSUnlockedName, *TLI); CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), Size, File}, FGetSUnlockedName); - if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + if (const Function *Fn = + dyn_cast<Function>(F.getCallee()->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); return CI; } @@ -1261,7 +1325,7 @@ Value *llvm::emitFReadUnlocked(Value *Ptr, Value *Size, Value *N, Value *File, Module *M = B.GetInsertBlock()->getModule(); LLVMContext &Context = B.GetInsertBlock()->getContext(); StringRef FReadUnlockedName = TLI->getName(LibFunc_fread_unlocked); - Constant *F = M->getOrInsertFunction( + FunctionCallee F = M->getOrInsertFunction( FReadUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(), DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType()); @@ -1269,7 +1333,8 @@ Value *llvm::emitFReadUnlocked(Value *Ptr, Value *Size, Value *N, Value *File, inferLibFuncAttributes(M, FReadUnlockedName, *TLI); CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File}); - if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + if (const Function *Fn = + dyn_cast<Function>(F.getCallee()->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); return CI; } diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp index e7828af648a9..df299f673f65 100644 --- a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -1,9 +1,8 @@ //===- BypassSlowDivision.cpp - Bypass slow division ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp index e58ddcf34667..f04d76e70c0d 100644 --- a/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -1,9 +1,8 @@ //===- CallPromotionUtils.cpp - Utilities for call promotion ----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -367,8 +366,9 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee, CastInst **RetBitCast) { assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted"); - // Set the called function of the call site to be the given callee. - CS.setCalledFunction(Callee); + // Set the called function of the call site to be the given callee (but don't + // change the type). + cast<CallBase>(CS.getInstruction())->setCalledOperand(Callee); // Since the call site will no longer be direct, we must clear metadata that // is only appropriate for indirect calls. This includes !prof and !callees @@ -412,6 +412,15 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee, // Remove any incompatible attributes for the argument. AttrBuilder ArgAttrs(CallerPAL.getParamAttributes(ArgNo)); ArgAttrs.remove(AttributeFuncs::typeIncompatible(FormalTy)); + + // If byval is used, this must be a pointer type, and the byval type must + // match the element type. Update it if present. + if (ArgAttrs.getByValType()) { + Type *NewTy = Callee->getParamByValType(ArgNo); + ArgAttrs.addByValAttr( + NewTy ? NewTy : cast<PointerType>(FormalTy)->getElementType()); + } + NewArgAttrs.push_back(AttributeSet::get(Ctx, ArgAttrs)); AttributeChanged = true; } else diff --git a/contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp b/contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp index cf41fd2e14c0..455fcbb1cf98 100644 --- a/contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp @@ -1,9 +1,8 @@ //===- CanonicalizeAliases.cpp - ThinLTO Support: Canonicalize Aliases ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp index 8f8c601f5f13..1026c9d37038 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -1,9 +1,8 @@ //===- CloneFunction.cpp - Clone a function into another function ---------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -16,13 +15,13 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" @@ -740,12 +739,12 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, const Twine &NameSuffix, LoopInfo *LI, DominatorTree *DT, SmallVectorImpl<BasicBlock *> &Blocks) { - assert(OrigLoop->getSubLoops().empty() && - "Loop to be cloned cannot have inner loop"); Function *F = OrigLoop->getHeader()->getParent(); Loop *ParentLoop = OrigLoop->getParentLoop(); + DenseMap<Loop *, Loop *> LMap; Loop *NewLoop = LI->AllocateLoop(); + LMap[OrigLoop] = NewLoop; if (ParentLoop) ParentLoop->addChildLoop(NewLoop); else @@ -765,14 +764,36 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, // Update DominatorTree. DT->addNewBlock(NewPH, LoopDomBB); + for (Loop *CurLoop : OrigLoop->getLoopsInPreorder()) { + Loop *&NewLoop = LMap[CurLoop]; + if (!NewLoop) { + NewLoop = LI->AllocateLoop(); + + // Establish the parent/child relationship. + Loop *OrigParent = CurLoop->getParentLoop(); + assert(OrigParent && "Could not find the original parent loop"); + Loop *NewParentLoop = LMap[OrigParent]; + assert(NewParentLoop && "Could not find the new parent loop"); + + NewParentLoop->addChildLoop(NewLoop); + } + } + for (BasicBlock *BB : OrigLoop->getBlocks()) { + Loop *CurLoop = LI->getLoopFor(BB); + Loop *&NewLoop = LMap[CurLoop]; + assert(NewLoop && "Expecting new loop to be allocated"); + BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F); VMap[BB] = NewBB; // Update LoopInfo. NewLoop->addBasicBlockToLoop(NewBB, *LI); + if (BB == CurLoop->getHeader()) + NewLoop->moveToHeader(NewBB); - // Add DominatorTree node. After seeing all blocks, update to correct IDom. + // Add DominatorTree node. After seeing all blocks, update to correct + // IDom. DT->addNewBlock(NewBB, NewPH); Blocks.push_back(NewBB); diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp index 659993aa5478..7ddf59becba9 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp @@ -1,9 +1,8 @@ //===- CloneModule.cpp - Clone an entire module ---------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 25d4ae583ecc..fa6d3f8ae873 100644 --- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1,9 +1,8 @@ //===- CodeExtractor.cpp - Pull code region into a new function -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -21,6 +20,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/BranchProbabilityInfo.h" @@ -44,6 +44,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -67,6 +68,7 @@ #include <vector> using namespace llvm; +using namespace llvm::PatternMatch; using ProfileCount = Function::ProfileCount; #define DEBUG_TYPE "code-extractor" @@ -207,6 +209,9 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, llvm_unreachable("Repeated basic blocks in extraction input"); } + LLVM_DEBUG(dbgs() << "Region front block: " << Result.front()->getName() + << '\n'); + for (auto *BB : Result) { if (!isBlockValidForExtraction(*BB, Result, AllowVarArgs, AllowAlloca)) return {}; @@ -224,9 +229,11 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, // the subgraph which is being extracted. for (auto *PBB : predecessors(BB)) if (!Result.count(PBB)) { - LLVM_DEBUG( - dbgs() << "No blocks in this region may have entries from " - "outside the region except for the first block!\n"); + LLVM_DEBUG(dbgs() << "No blocks in this region may have entries from " + "outside the region except for the first block!\n" + << "Problematic source BB: " << BB->getName() << "\n" + << "Problematic destination BB: " << PBB->getName() + << "\n"); return {}; } } @@ -236,18 +243,20 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI, bool AllowVarArgs, - bool AllowAlloca, std::string Suffix) + BranchProbabilityInfo *BPI, AssumptionCache *AC, + bool AllowVarArgs, bool AllowAlloca, + std::string Suffix) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), AllowVarArgs(AllowVarArgs), + BPI(BPI), AC(AC), AllowVarArgs(AllowVarArgs), Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)), Suffix(Suffix) {} CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI, std::string Suffix) + BranchProbabilityInfo *BPI, AssumptionCache *AC, + std::string Suffix) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), AllowVarArgs(false), + BPI(BPI), AC(AC), AllowVarArgs(false), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, /* AllowVarArgs */ false, /* AllowAlloca */ false)), @@ -325,7 +334,7 @@ bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers( if (dyn_cast<Constant>(MemAddr)) break; Value *Base = MemAddr->stripInBoundsConstantOffsets(); - if (!dyn_cast<AllocaInst>(Base) || Base == AI) + if (!isa<AllocaInst>(Base) || Base == AI) return false; break; } @@ -401,11 +410,74 @@ CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) { return CommonExitBlock; } +// Find the pair of life time markers for address 'Addr' that are either +// defined inside the outline region or can legally be shrinkwrapped into the +// outline region. If there are not other untracked uses of the address, return +// the pair of markers if found; otherwise return a pair of nullptr. +CodeExtractor::LifetimeMarkerInfo +CodeExtractor::getLifetimeMarkers(Instruction *Addr, + BasicBlock *ExitBlock) const { + LifetimeMarkerInfo Info; + + for (User *U : Addr->users()) { + IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U); + if (IntrInst) { + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) { + // Do not handle the case where Addr has multiple start markers. + if (Info.LifeStart) + return {}; + Info.LifeStart = IntrInst; + } + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) { + if (Info.LifeEnd) + return {}; + Info.LifeEnd = IntrInst; + } + continue; + } + // Find untracked uses of the address, bail. + if (!definedInRegion(Blocks, U)) + return {}; + } + + if (!Info.LifeStart || !Info.LifeEnd) + return {}; + + Info.SinkLifeStart = !definedInRegion(Blocks, Info.LifeStart); + Info.HoistLifeEnd = !definedInRegion(Blocks, Info.LifeEnd); + // Do legality check. + if ((Info.SinkLifeStart || Info.HoistLifeEnd) && + !isLegalToShrinkwrapLifetimeMarkers(Addr)) + return {}; + + // Check to see if we have a place to do hoisting, if not, bail. + if (Info.HoistLifeEnd && !ExitBlock) + return {}; + + return Info; +} + void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const { Function *Func = (*Blocks.begin())->getParent(); ExitBlock = getCommonExitBlock(Blocks); + auto moveOrIgnoreLifetimeMarkers = + [&](const LifetimeMarkerInfo &LMI) -> bool { + if (!LMI.LifeStart) + return false; + if (LMI.SinkLifeStart) { + LLVM_DEBUG(dbgs() << "Sinking lifetime.start: " << *LMI.LifeStart + << "\n"); + SinkCands.insert(LMI.LifeStart); + } + if (LMI.HoistLifeEnd) { + LLVM_DEBUG(dbgs() << "Hoisting lifetime.end: " << *LMI.LifeEnd << "\n"); + HoistCands.insert(LMI.LifeEnd); + } + return true; + }; + for (BasicBlock &BB : *Func) { if (Blocks.count(&BB)) continue; @@ -414,95 +486,52 @@ void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, if (!AI) continue; - // Find the pair of life time markers for address 'Addr' that are either - // defined inside the outline region or can legally be shrinkwrapped into - // the outline region. If there are not other untracked uses of the - // address, return the pair of markers if found; otherwise return a pair - // of nullptr. - auto GetLifeTimeMarkers = - [&](Instruction *Addr, bool &SinkLifeStart, - bool &HoistLifeEnd) -> std::pair<Instruction *, Instruction *> { - Instruction *LifeStart = nullptr, *LifeEnd = nullptr; - - for (User *U : Addr->users()) { - IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U); - if (IntrInst) { - if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) { - // Do not handle the case where AI has multiple start markers. - if (LifeStart) - return std::make_pair<Instruction *>(nullptr, nullptr); - LifeStart = IntrInst; - } - if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) { - if (LifeEnd) - return std::make_pair<Instruction *>(nullptr, nullptr); - LifeEnd = IntrInst; - } - continue; - } - // Find untracked uses of the address, bail. - if (!definedInRegion(Blocks, U)) - return std::make_pair<Instruction *>(nullptr, nullptr); - } - - if (!LifeStart || !LifeEnd) - return std::make_pair<Instruction *>(nullptr, nullptr); - - SinkLifeStart = !definedInRegion(Blocks, LifeStart); - HoistLifeEnd = !definedInRegion(Blocks, LifeEnd); - // Do legality Check. - if ((SinkLifeStart || HoistLifeEnd) && - !isLegalToShrinkwrapLifetimeMarkers(Addr)) - return std::make_pair<Instruction *>(nullptr, nullptr); - - // Check to see if we have a place to do hoisting, if not, bail. - if (HoistLifeEnd && !ExitBlock) - return std::make_pair<Instruction *>(nullptr, nullptr); - - return std::make_pair(LifeStart, LifeEnd); - }; - - bool SinkLifeStart = false, HoistLifeEnd = false; - auto Markers = GetLifeTimeMarkers(AI, SinkLifeStart, HoistLifeEnd); - - if (Markers.first) { - if (SinkLifeStart) - SinkCands.insert(Markers.first); + LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(AI, ExitBlock); + bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo); + if (Moved) { + LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n"); SinkCands.insert(AI); - if (HoistLifeEnd) - HoistCands.insert(Markers.second); continue; } - // Follow the bitcast. - Instruction *MarkerAddr = nullptr; + // Follow any bitcasts. + SmallVector<Instruction *, 2> Bitcasts; + SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo; for (User *U : AI->users()) { if (U->stripInBoundsConstantOffsets() == AI) { - SinkLifeStart = false; - HoistLifeEnd = false; Instruction *Bitcast = cast<Instruction>(U); - Markers = GetLifeTimeMarkers(Bitcast, SinkLifeStart, HoistLifeEnd); - if (Markers.first) { - MarkerAddr = Bitcast; + LifetimeMarkerInfo LMI = getLifetimeMarkers(Bitcast, ExitBlock); + if (LMI.LifeStart) { + Bitcasts.push_back(Bitcast); + BitcastLifetimeInfo.push_back(LMI); continue; } } // Found unknown use of AI. if (!definedInRegion(Blocks, U)) { - MarkerAddr = nullptr; + Bitcasts.clear(); break; } } - if (MarkerAddr) { - if (SinkLifeStart) - SinkCands.insert(Markers.first); - if (!definedInRegion(Blocks, MarkerAddr)) - SinkCands.insert(MarkerAddr); - SinkCands.insert(AI); - if (HoistLifeEnd) - HoistCands.insert(Markers.second); + // Either no bitcasts reference the alloca or there are unknown uses. + if (Bitcasts.empty()) + continue; + + LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n"); + SinkCands.insert(AI); + for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) { + Instruction *BitcastAddr = Bitcasts[I]; + const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I]; + assert(LMI.LifeStart && + "Unsafe to sink bitcast without lifetime markers"); + moveOrIgnoreLifetimeMarkers(LMI); + if (!definedInRegion(Blocks, BitcastAddr)) { + LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr + << "\n"); + SinkCands.insert(BitcastAddr); + } } } } @@ -780,6 +809,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::NoBuiltin: case Attribute::NoCapture: case Attribute::NoReturn: + case Attribute::NoSync: case Attribute::None: case Attribute::NonNull: case Attribute::ReadNone: @@ -792,8 +822,10 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::StructRet: case Attribute::SwiftError: case Attribute::SwiftSelf: + case Attribute::WillReturn: case Attribute::WriteOnly: case Attribute::ZExt: + case Attribute::ImmArg: case Attribute::EndAttrKinds: continue; // Those attributes should be safe to propagate to the extracted function. @@ -803,6 +835,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::InlineHint: case Attribute::MinSize: case Attribute::NoDuplicate: + case Attribute::NoFree: case Attribute::NoImplicitFloat: case Attribute::NoInline: case Attribute::NonLazyBind: @@ -817,6 +850,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::SanitizeMemory: case Attribute::SanitizeThread: case Attribute::SanitizeHWAddress: + case Attribute::SanitizeMemTag: case Attribute::SpeculativeLoadHardening: case Attribute::StackProtect: case Attribute::StackProtectReq: @@ -845,7 +879,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, Instruction *TI = newFunction->begin()->getTerminator(); GetElementPtrInst *GEP = GetElementPtrInst::Create( StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); - RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI); + RewriteVal = new LoadInst(StructTy->getElementType(i), GEP, + "loadgep_" + inputs[i]->getName(), TI); } else RewriteVal = &*AI++; @@ -880,6 +915,88 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, return newFunction; } +/// Erase lifetime.start markers which reference inputs to the extraction +/// region, and insert the referenced memory into \p LifetimesStart. +/// +/// The extraction region is defined by a set of blocks (\p Blocks), and a set +/// of allocas which will be moved from the caller function into the extracted +/// function (\p SunkAllocas). +static void eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks, + const SetVector<Value *> &SunkAllocas, + SetVector<Value *> &LifetimesStart) { + for (BasicBlock *BB : Blocks) { + for (auto It = BB->begin(), End = BB->end(); It != End;) { + auto *II = dyn_cast<IntrinsicInst>(&*It); + ++It; + if (!II || !II->isLifetimeStartOrEnd()) + continue; + + // Get the memory operand of the lifetime marker. If the underlying + // object is a sunk alloca, or is otherwise defined in the extraction + // region, the lifetime marker must not be erased. + Value *Mem = II->getOperand(1)->stripInBoundsOffsets(); + if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem)) + continue; + + if (II->getIntrinsicID() == Intrinsic::lifetime_start) + LifetimesStart.insert(Mem); + II->eraseFromParent(); + } + } +} + +/// Insert lifetime start/end markers surrounding the call to the new function +/// for objects defined in the caller. +static void insertLifetimeMarkersSurroundingCall( + Module *M, ArrayRef<Value *> LifetimesStart, ArrayRef<Value *> LifetimesEnd, + CallInst *TheCall) { + LLVMContext &Ctx = M->getContext(); + auto Int8PtrTy = Type::getInt8PtrTy(Ctx); + auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1); + Instruction *Term = TheCall->getParent()->getTerminator(); + + // The memory argument to a lifetime marker must be a i8*. Cache any bitcasts + // needed to satisfy this requirement so they may be reused. + DenseMap<Value *, Value *> Bitcasts; + + // Emit lifetime markers for the pointers given in \p Objects. Insert the + // markers before the call if \p InsertBefore, and after the call otherwise. + auto insertMarkers = [&](Function *MarkerFunc, ArrayRef<Value *> Objects, + bool InsertBefore) { + for (Value *Mem : Objects) { + assert((!isa<Instruction>(Mem) || cast<Instruction>(Mem)->getFunction() == + TheCall->getFunction()) && + "Input memory not defined in original function"); + Value *&MemAsI8Ptr = Bitcasts[Mem]; + if (!MemAsI8Ptr) { + if (Mem->getType() == Int8PtrTy) + MemAsI8Ptr = Mem; + else + MemAsI8Ptr = + CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall); + } + + auto Marker = CallInst::Create(MarkerFunc, {NegativeOne, MemAsI8Ptr}); + if (InsertBefore) + Marker->insertBefore(TheCall); + else + Marker->insertBefore(Term); + } + }; + + if (!LifetimesStart.empty()) { + auto StartFn = llvm::Intrinsic::getDeclaration( + M, llvm::Intrinsic::lifetime_start, Int8PtrTy); + insertMarkers(StartFn, LifetimesStart, /*InsertBefore=*/true); + } + + if (!LifetimesEnd.empty()) { + auto EndFn = llvm::Intrinsic::getDeclaration( + M, llvm::Intrinsic::lifetime_end, Int8PtrTy); + insertMarkers(EndFn, LifetimesEnd, /*InsertBefore=*/false); + } +} + /// emitCallAndSwitchStatement - This method sets up the caller side by adding /// the call instruction, splitting any PHI nodes in the header block as /// necessary. @@ -897,11 +1014,18 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, CallInst *call = nullptr; // Add inputs as params, or to be filled into the struct - for (Value *input : inputs) + unsigned ArgNo = 0; + SmallVector<unsigned, 1> SwiftErrorArgs; + for (Value *input : inputs) { if (AggregateArgs) StructValues.push_back(input); - else + else { params.push_back(input); + if (input->isSwiftError()) + SwiftErrorArgs.push_back(ArgNo); + } + ++ArgNo; + } // Create allocas for the outputs for (Value *output : outputs) { @@ -957,13 +1081,18 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, } codeReplacer->getInstList().push_back(call); + // Set swifterror parameter attributes. + for (unsigned SwiftErrArgNo : SwiftErrorArgs) { + call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); + newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); + } + Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); unsigned FirstOut = inputs.size(); if (!AggregateArgs) std::advance(OutputArgBegin, inputs.size()); // Reload the outputs passed in by reference. - Function::arg_iterator OAI = OutputArgBegin; for (unsigned i = 0, e = outputs.size(); i != e; ++i) { Value *Output = nullptr; if (AggregateArgs) { @@ -977,7 +1106,8 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, } else { Output = ReloadOutputs[i]; } - LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload"); + LoadInst *load = new LoadInst(outputs[i]->getType(), Output, + outputs[i]->getName() + ".reload"); Reloads.push_back(load); codeReplacer->getInstList().push_back(load); std::vector<User *> Users(outputs[i]->user_begin(), outputs[i]->user_end()); @@ -986,40 +1116,6 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, if (!Blocks.count(inst->getParent())) inst->replaceUsesOfWith(outputs[i], load); } - - // Store to argument right after the definition of output value. - auto *OutI = dyn_cast<Instruction>(outputs[i]); - if (!OutI) - continue; - - // Find proper insertion point. - BasicBlock::iterator InsertPt; - // In case OutI is an invoke, we insert the store at the beginning in the - // 'normal destination' BB. Otherwise we insert the store right after OutI. - if (auto *InvokeI = dyn_cast<InvokeInst>(OutI)) - InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); - else if (auto *Phi = dyn_cast<PHINode>(OutI)) - InsertPt = Phi->getParent()->getFirstInsertionPt(); - else - InsertPt = std::next(OutI->getIterator()); - - assert(OAI != newFunction->arg_end() && - "Number of output arguments should match " - "the amount of defined values"); - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), &*InsertPt); - new StoreInst(outputs[i], GEP, &*InsertPt); - // Since there should be only one struct argument aggregating - // all the output values, we shouldn't increment OAI, which always - // points to the struct argument, in this case. - } else { - new StoreInst(outputs[i], &*OAI, &*InsertPt); - ++OAI; - } } // Now we can emit a switch statement using the call as a value. @@ -1075,6 +1171,50 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, } } + // Store the arguments right after the definition of output value. + // This should be proceeded after creating exit stubs to be ensure that invoke + // result restore will be placed in the outlined function. + Function::arg_iterator OAI = OutputArgBegin; + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + auto *OutI = dyn_cast<Instruction>(outputs[i]); + if (!OutI) + continue; + + // Find proper insertion point. + BasicBlock::iterator InsertPt; + // In case OutI is an invoke, we insert the store at the beginning in the + // 'normal destination' BB. Otherwise we insert the store right after OutI. + if (auto *InvokeI = dyn_cast<InvokeInst>(OutI)) + InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); + else if (auto *Phi = dyn_cast<PHINode>(OutI)) + InsertPt = Phi->getParent()->getFirstInsertionPt(); + else + InsertPt = std::next(OutI->getIterator()); + + Instruction *InsertBefore = &*InsertPt; + assert((InsertBefore->getFunction() == newFunction || + Blocks.count(InsertBefore->getParent())) && + "InsertPt should be in new function"); + assert(OAI != newFunction->arg_end() && + "Number of output arguments should match " + "the amount of defined values"); + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), + InsertBefore); + new StoreInst(outputs[i], GEP, InsertBefore); + // Since there should be only one struct argument aggregating + // all the output values, we shouldn't increment OAI, which always + // points to the struct argument, in this case. + } else { + new StoreInst(outputs[i], &*OAI, InsertBefore); + ++OAI; + } + } + // Now that we've done the deed, simplify the switch instruction. Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); switch (NumExitBlocks) { @@ -1119,6 +1259,10 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, break; } + // Insert lifetime markers around the reloads of any output values. The + // allocas output values are stored in are only in-use in the codeRepl block. + insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); + return call; } @@ -1133,6 +1277,13 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) { // Insert this basic block into the new function newBlocks.push_back(Block); + + // Remove @llvm.assume calls that were moved to the new function from the + // old function's assumption cache. + if (AC) + for (auto &I : *Block) + if (match(&I, m_Intrinsic<Intrinsic::assume>())) + AC->unregisterAssumption(cast<CallInst>(&I)); } } @@ -1181,71 +1332,6 @@ void CodeExtractor::calculateNewCallTerminatorWeights( MDBuilder(TI->getContext()).createBranchWeights(BranchWeights)); } -/// Scan the extraction region for lifetime markers which reference inputs. -/// Erase these markers. Return the inputs which were referenced. -/// -/// The extraction region is defined by a set of blocks (\p Blocks), and a set -/// of allocas which will be moved from the caller function into the extracted -/// function (\p SunkAllocas). -static SetVector<Value *> -eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks, - const SetVector<Value *> &SunkAllocas) { - SetVector<Value *> InputObjectsWithLifetime; - for (BasicBlock *BB : Blocks) { - for (auto It = BB->begin(), End = BB->end(); It != End;) { - auto *II = dyn_cast<IntrinsicInst>(&*It); - ++It; - if (!II || !II->isLifetimeStartOrEnd()) - continue; - - // Get the memory operand of the lifetime marker. If the underlying - // object is a sunk alloca, or is otherwise defined in the extraction - // region, the lifetime marker must not be erased. - Value *Mem = II->getOperand(1)->stripInBoundsOffsets(); - if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem)) - continue; - - InputObjectsWithLifetime.insert(Mem); - II->eraseFromParent(); - } - } - return InputObjectsWithLifetime; -} - -/// Insert lifetime start/end markers surrounding the call to the new function -/// for objects defined in the caller. -static void insertLifetimeMarkersSurroundingCall( - Module *M, const SetVector<Value *> &InputObjectsWithLifetime, - CallInst *TheCall) { - if (InputObjectsWithLifetime.empty()) - return; - - LLVMContext &Ctx = M->getContext(); - auto Int8PtrTy = Type::getInt8PtrTy(Ctx); - auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1); - auto LifetimeStartFn = llvm::Intrinsic::getDeclaration( - M, llvm::Intrinsic::lifetime_start, Int8PtrTy); - auto LifetimeEndFn = llvm::Intrinsic::getDeclaration( - M, llvm::Intrinsic::lifetime_end, Int8PtrTy); - for (Value *Mem : InputObjectsWithLifetime) { - assert((!isa<Instruction>(Mem) || - cast<Instruction>(Mem)->getFunction() == TheCall->getFunction()) && - "Input memory not defined in original function"); - Value *MemAsI8Ptr = nullptr; - if (Mem->getType() == Int8PtrTy) - MemAsI8Ptr = Mem; - else - MemAsI8Ptr = - CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall); - - auto StartMarker = - CallInst::Create(LifetimeStartFn, {NegativeOne, MemAsI8Ptr}); - StartMarker->insertBefore(TheCall); - auto EndMarker = CallInst::Create(LifetimeEndFn, {NegativeOne, MemAsI8Ptr}); - EndMarker->insertAfter(TheCall); - } -} - Function *CodeExtractor::extractCodeRegion() { if (!isEligible()) return nullptr; @@ -1348,10 +1434,24 @@ Function *CodeExtractor::extractCodeRegion() { // Find inputs to, outputs from the code region. findInputsOutputs(inputs, outputs, SinkingCands); - // Now sink all instructions which only have non-phi uses inside the region - for (auto *II : SinkingCands) - cast<Instruction>(II)->moveBefore(*newFuncRoot, - newFuncRoot->getFirstInsertionPt()); + // Now sink all instructions which only have non-phi uses inside the region. + // Group the allocas at the start of the block, so that any bitcast uses of + // the allocas are well-defined. + AllocaInst *FirstSunkAlloca = nullptr; + for (auto *II : SinkingCands) { + if (auto *AI = dyn_cast<AllocaInst>(II)) { + AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); + if (!FirstSunkAlloca) + FirstSunkAlloca = AI; + } + } + assert((SinkingCands.empty() || FirstSunkAlloca) && + "Did not expect a sink candidate without any allocas"); + for (auto *II : SinkingCands) { + if (!isa<AllocaInst>(II)) { + cast<Instruction>(II)->moveAfter(FirstSunkAlloca); + } + } if (!HoistingCands.empty()) { auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); @@ -1361,11 +1461,11 @@ Function *CodeExtractor::extractCodeRegion() { } // Collect objects which are inputs to the extraction region and also - // referenced by lifetime start/end markers within it. The effects of these + // referenced by lifetime start markers within it. The effects of these // markers must be replicated in the calling function to prevent the stack // coloring pass from merging slots which store input objects. - ValueSet InputObjectsWithLifetime = - eraseLifetimeMarkersOnInputs(Blocks, SinkingCands); + ValueSet LifetimesStart; + eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart); // Construct new function based on inputs/outputs & add allocas for all defs. Function *newFunction = @@ -1388,8 +1488,8 @@ Function *CodeExtractor::extractCodeRegion() { // Replicate the effects of any lifetime start/end markers which referenced // input objects in the extraction region by placing markers around the call. - insertLifetimeMarkersSurroundingCall(oldFunction->getParent(), - InputObjectsWithLifetime, TheCall); + insertLifetimeMarkersSurroundingCall( + oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, TheCall); // Propagate personality info to the new function if there is one. if (oldFunction->hasPersonalityFn()) diff --git a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp index 4e7da7d0449f..069a86f6ab33 100644 --- a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp @@ -1,9 +1,8 @@ //===- CtorUtils.cpp - Helpers for working with global_ctors ----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp index 975b363859a9..5f53d794fe8a 100644 --- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -1,9 +1,8 @@ //===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -73,7 +72,8 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, Value *&V = Loads[PN->getIncomingBlock(i)]; if (!V) { // Insert the load into the predecessor block - V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, + V = new LoadInst(I.getType(), Slot, I.getName() + ".reload", + VolatileLoads, PN->getIncomingBlock(i)->getTerminator()); } PN->setIncomingValue(i, V); @@ -81,7 +81,8 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, } else { // If this is a normal instruction, just insert a load. - Value *V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, U); + Value *V = new LoadInst(I.getType(), Slot, I.getName() + ".reload", + VolatileLoads, U); U->replaceUsesOfWith(&I, V); } } @@ -142,7 +143,8 @@ AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt) /* empty */; // Don't insert before PHI nodes or landingpad instrs. - Value *V = new LoadInst(Slot, P->getName() + ".reload", &*InsertPt); + Value *V = + new LoadInst(P->getType(), Slot, P->getName() + ".reload", &*InsertPt); P->replaceAllUsesWith(V); // Delete PHI. diff --git a/contrib/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/contrib/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp index 569ea58a3047..4aa40eeadda4 100644 --- a/contrib/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp +++ b/contrib/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp @@ -1,9 +1,8 @@ //===- EntryExitInstrumenter.cpp - Function Entry/Exit Instrumentation ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -31,7 +30,7 @@ static void insertCall(Function &CurFn, StringRef Func, Func == "__mcount" || Func == "_mcount" || Func == "__cyg_profile_func_enter_bare") { - Constant *Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C)); + FunctionCallee Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C)); CallInst *Call = CallInst::Create(Fn, "", InsertionPt); Call->setDebugLoc(DL); return; @@ -40,7 +39,7 @@ static void insertCall(Function &CurFn, StringRef Func, if (Func == "__cyg_profile_func_enter" || Func == "__cyg_profile_func_exit") { Type *ArgTypes[] = {Type::getInt8PtrTy(C), Type::getInt8PtrTy(C)}; - Constant *Fn = M.getOrInsertFunction( + FunctionCallee Fn = M.getOrInsertFunction( Func, FunctionType::get(Type::getVoidTy(C), ArgTypes, false)); Instruction *RetAddr = CallInst::Create( diff --git a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp index 762a374c135c..914babeb6829 100644 --- a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp +++ b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp @@ -1,9 +1,8 @@ //===- EscapeEnumerator.cpp -----------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -19,7 +18,7 @@ #include "llvm/IR/Module.h" using namespace llvm; -static Constant *getDefaultPersonalityFn(Module *M) { +static FunctionCallee getDefaultPersonalityFn(Module *M) { LLVMContext &C = M->getContext(); Triple T(M->getTargetTriple()); EHPersonality Pers = getDefaultEHPersonality(T); @@ -69,8 +68,8 @@ IRBuilder<> *EscapeEnumerator::Next() { BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F); Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C)); if (!F.hasPersonalityFn()) { - Constant *PersFn = getDefaultPersonalityFn(F.getParent()); - F.setPersonalityFn(PersFn); + FunctionCallee PersFn = getDefaultPersonalityFn(F.getParent()); + F.setPersonalityFn(cast<Constant>(PersFn.getCallee())); } if (isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) { diff --git a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp index e875cd686b00..0e203f4e075d 100644 --- a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp @@ -1,9 +1,8 @@ //===- Evaluator.cpp - LLVM IR evaluator ----------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -175,6 +174,34 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return false; } +/// Apply 'Func' to Ptr. If this returns nullptr, introspect the pointer's +/// type and walk down through the initial elements to obtain additional +/// pointers to try. Returns the first non-null return value from Func, or +/// nullptr if the type can't be introspected further. +static Constant * +evaluateBitcastFromPtr(Constant *Ptr, const DataLayout &DL, + const TargetLibraryInfo *TLI, + std::function<Constant *(Constant *)> Func) { + Constant *Val; + while (!(Val = Func(Ptr))) { + // If Ty is a struct, we can convert the pointer to the struct + // into a pointer to its first member. + // FIXME: This could be extended to support arrays as well. + Type *Ty = cast<PointerType>(Ptr->getType())->getElementType(); + if (!isa<StructType>(Ty)) + break; + + IntegerType *IdxTy = IntegerType::get(Ty->getContext(), 32); + Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); + Constant *const IdxList[] = {IdxZero, IdxZero}; + + Ptr = ConstantExpr::getGetElementPtr(Ty, Ptr, IdxList); + if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) + Ptr = FoldedPtr; + } + return Val; +} + static Constant *getInitializer(Constant *C) { auto *GV = dyn_cast<GlobalVariable>(C); return GV && GV->hasDefinitiveInitializer() ? GV->getInitializer() : nullptr; @@ -185,8 +212,14 @@ static Constant *getInitializer(Constant *C) { Constant *Evaluator::ComputeLoadResult(Constant *P) { // If this memory location has been recently stored, use the stored value: it // is the most up-to-date. - DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P); - if (I != MutatedMemory.end()) return I->second; + auto findMemLoc = [this](Constant *Ptr) { + DenseMap<Constant *, Constant *>::const_iterator I = + MutatedMemory.find(Ptr); + return I != MutatedMemory.end() ? I->second : nullptr; + }; + + if (Constant *Val = findMemLoc(P)) + return Val; // Access it. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { @@ -204,13 +237,17 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) { break; // Handle a constantexpr bitcast. case Instruction::BitCast: - Constant *Val = getVal(CE->getOperand(0)); - auto MM = MutatedMemory.find(Val); - auto *I = (MM != MutatedMemory.end()) ? MM->second - : getInitializer(CE->getOperand(0)); - if (I) + // We're evaluating a load through a pointer that was bitcast to a + // different type. See if the "from" pointer has recently been stored. + // If it hasn't, we may still be able to find a stored pointer by + // introspecting the type. + Constant *Val = + evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, findMemLoc); + if (!Val) + Val = getInitializer(CE->getOperand(0)); + if (Val) return ConstantFoldLoadThroughBitcast( - I, P->getType()->getPointerElementType(), DL); + Val, P->getType()->getPointerElementType(), DL); break; } } @@ -330,37 +367,26 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, << "Attempting to resolve bitcast on constant ptr.\n"); // If we're evaluating a store through a bitcast, then we need // to pull the bitcast off the pointer type and push it onto the - // stored value. - Ptr = CE->getOperand(0); - - Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType(); - - // In order to push the bitcast onto the stored value, a bitcast - // from NewTy to Val's type must be legal. If it's not, we can try - // introspecting NewTy to find a legal conversion. - Constant *NewVal; - while (!(NewVal = ConstantFoldLoadThroughBitcast(Val, NewTy, DL))) { - // If NewTy is a struct, we can convert the pointer to the struct - // into a pointer to its first member. - // FIXME: This could be extended to support arrays as well. - if (StructType *STy = dyn_cast<StructType>(NewTy)) { - NewTy = STy->getTypeAtIndex(0U); - - IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32); - Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); - Constant * const IdxList[] = {IdxZero, IdxZero}; - - Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList); - if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) - Ptr = FoldedPtr; - - // If we can't improve the situation by introspecting NewTy, - // we have to give up. - } else { - LLVM_DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " - "evaluate.\n"); - return false; + // stored value. In order to push the bitcast onto the stored value, + // a bitcast from the pointer's element type to Val's type must be + // legal. If it's not, we can try introspecting the type to find a + // legal conversion. + + auto castValTy = [&](Constant *P) -> Constant * { + Type *Ty = cast<PointerType>(P->getType())->getElementType(); + if (Constant *FV = ConstantFoldLoadThroughBitcast(Val, Ty, DL)) { + Ptr = P; + return FV; } + return nullptr; + }; + + Constant *NewVal = + evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, castValTy); + if (!NewVal) { + LLVM_DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " + "evaluate.\n"); + return false; } Val = NewVal; @@ -541,7 +567,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, if (Callee->isDeclaration()) { // If this is a function we can constant fold, do it. - if (Constant *C = ConstantFoldCall(CS, Callee, Formals, TLI)) { + if (Constant *C = ConstantFoldCall(cast<CallBase>(CS.getInstruction()), + Callee, Formals, TLI)) { InstResult = castCallResultIfNeeded(CS.getCalledValue(), C); if (!InstResult) return false; diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp index d9778f4a1fb7..0c52e6f3703b 100644 --- a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp @@ -1,9 +1,8 @@ //===- FlatternCFG.cpp - Code to perform CFG flattening -------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp index a717d9b72819..a9b28754c8e9 100644 --- a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -1,9 +1,8 @@ //===- FunctionComparator.h - Function Comparator -------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -114,6 +113,19 @@ int FunctionComparator::cmpAttrs(const AttributeList L, for (; LI != LE && RI != RE; ++LI, ++RI) { Attribute LA = *LI; Attribute RA = *RI; + if (LA.isTypeAttribute() && RA.isTypeAttribute()) { + if (LA.getKindAsEnum() != RA.getKindAsEnum()) + return cmpNumbers(LA.getKindAsEnum(), RA.getKindAsEnum()); + + Type *TyL = LA.getValueAsType(); + Type *TyR = RA.getValueAsType(); + if (TyL && TyR) + return cmpTypes(TyL, TyR); + + // Two pointers, at least one null, so the comparison result is + // independent of the value of a real pointer. + return cmpNumbers((uint64_t)TyL, (uint64_t)TyR); + } if (LA < RA) return -1; if (RA < LA) @@ -557,31 +569,20 @@ int FunctionComparator::cmpOperations(const Instruction *L, } if (const CmpInst *CI = dyn_cast<CmpInst>(L)) return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate()); - if (const CallInst *CI = dyn_cast<CallInst>(L)) { - if (int Res = cmpNumbers(CI->getCallingConv(), - cast<CallInst>(R)->getCallingConv())) + if (auto CSL = CallSite(const_cast<Instruction *>(L))) { + auto CSR = CallSite(const_cast<Instruction *>(R)); + if (int Res = cmpNumbers(CSL.getCallingConv(), CSR.getCallingConv())) return Res; - if (int Res = - cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes())) + if (int Res = cmpAttrs(CSL.getAttributes(), CSR.getAttributes())) return Res; - if (int Res = cmpOperandBundlesSchema(CI, R)) - return Res; - return cmpRangeMetadata( - CI->getMetadata(LLVMContext::MD_range), - cast<CallInst>(R)->getMetadata(LLVMContext::MD_range)); - } - if (const InvokeInst *II = dyn_cast<InvokeInst>(L)) { - if (int Res = cmpNumbers(II->getCallingConv(), - cast<InvokeInst>(R)->getCallingConv())) + if (int Res = cmpOperandBundlesSchema(L, R)) return Res; - if (int Res = - cmpAttrs(II->getAttributes(), cast<InvokeInst>(R)->getAttributes())) - return Res; - if (int Res = cmpOperandBundlesSchema(II, R)) - return Res; - return cmpRangeMetadata( - II->getMetadata(LLVMContext::MD_range), - cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range)); + if (const CallInst *CI = dyn_cast<CallInst>(L)) + if (int Res = cmpNumbers(CI->getTailCallKind(), + cast<CallInst>(R)->getTailCallKind())) + return Res; + return cmpRangeMetadata(L->getMetadata(LLVMContext::MD_range), + R->getMetadata(LLVMContext::MD_range)); } if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) { ArrayRef<unsigned> LIndices = IVI->getIndices(); diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp index 81d63ee80394..c9cc0990f237 100644 --- a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -1,9 +1,8 @@ //===- lib/Transforms/Utils/FunctionImportUtils.cpp - Importing utilities -===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -130,7 +129,7 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV, // definitions upon import, so that they are available for inlining // and/or optimization, but are turned into declarations later // during the EliminateAvailableExternally pass. - if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV)) + if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV)) return GlobalValue::AvailableExternallyLinkage; // An imported external declaration stays external. return SGV->getLinkage(); @@ -159,7 +158,7 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV, // equivalent, so the issue described above for weak_any does not exist, // and the definition can be imported. It can be treated similarly // to an imported externally visible global value. - if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV)) + if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV)) return GlobalValue::AvailableExternallyLinkage; else return GlobalValue::ExternalLinkage; @@ -177,7 +176,7 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV, // If we are promoting the local to global scope, it is handled // similarly to a normal externally visible global. if (DoPromote) { - if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV)) + if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV)) return GlobalValue::AvailableExternallyLinkage; else return GlobalValue::ExternalLinkage; @@ -230,11 +229,11 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { } } - // Mark read-only variables which can be imported with specific attribute. - // We can't internalize them now because IRMover will fail to link variable - // definitions to their external declarations during ThinLTO import. We'll - // internalize read-only variables later, after import is finished. - // See internalizeImmutableGVs. + // Mark read/write-only variables which can be imported with specific + // attribute. We can't internalize them now because IRMover will fail + // to link variable definitions to their external declarations during + // ThinLTO import. We'll internalize read-only variables later, after + // import is finished. See internalizeGVsAfterImport. // // If global value dead stripping is not enabled in summary then // propagateConstants hasn't been run. We can't internalize GV @@ -242,7 +241,8 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { if (!GV.isDeclaration() && VI && ImportIndex.withGlobalValueDeadStripping()) { const auto &SL = VI.getSummaryList(); auto *GVS = SL.empty() ? nullptr : dyn_cast<GlobalVarSummary>(SL[0].get()); - if (GVS && GVS->isReadOnly()) + // At this stage "maybe" is "definitely" + if (GVS && (GVS->maybeReadOnly() || GVS->maybeWriteOnly())) cast<GlobalVariable>(&GV)->addAttribute("thinlto-internalize"); } diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp index ff6970db47da..a2942869130d 100644 --- a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp +++ b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -1,9 +1,8 @@ //===-- GlobalStatus.cpp - Compute status info for globals -----------------==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp b/contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp index 08de0a4c53e9..34c32d9c0c98 100644 --- a/contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp @@ -1,9 +1,8 @@ //===-- GuardUtils.cpp - Utils for work with guards -------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // Utils that are used to perform transformations related to guards and their diff --git a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp index 02482c550321..8041e66e6c4c 100644 --- a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp @@ -1,9 +1,8 @@ //===-- ImportedFunctionsInliningStats.cpp ----------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // Generating inliner statistics for imported functions, mostly useful for diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp index 623fe91a5a60..a7f0f7ac5d61 100644 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1,9 +1,8 @@ //===- InlineFunction.cpp - Code to perform function inlining -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -85,16 +84,10 @@ PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", cl::init(true), cl::Hidden, cl::desc("Convert align attributes to assumptions during inlining.")); -llvm::InlineResult llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI, - AAResults *CalleeAAR, - bool InsertLifetime) { - return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime); -} - -llvm::InlineResult llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI, +llvm::InlineResult llvm::InlineFunction(CallBase *CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR, bool InsertLifetime) { - return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime); + return InlineFunction(CallSite(CB), IFI, CalleeAAR, InsertLifetime); } namespace { @@ -1042,11 +1035,10 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, SmallSetVector<const Argument *, 4> NAPtrArgs; for (const Value *V : PtrArgs) { - SmallVector<Value *, 4> Objects; - GetUnderlyingObjects(const_cast<Value*>(V), - Objects, DL, /* LI = */ nullptr); + SmallVector<const Value *, 4> Objects; + GetUnderlyingObjects(V, Objects, DL, /* LI = */ nullptr); - for (Value *O : Objects) + for (const Value *O : Objects) ObjSet.insert(O); } @@ -1216,14 +1208,14 @@ static void UpdateCallGraphAfterInlining(CallSite CS, // If the call was inlined, but then constant folded, there is no edge to // add. Check for this case. - Instruction *NewCall = dyn_cast<Instruction>(VMI->second); + auto *NewCall = dyn_cast<CallBase>(VMI->second); if (!NewCall) continue; // We do not treat intrinsic calls like real function calls because we // expect them to become inline code; do not add an edge for an intrinsic. - CallSite CS = CallSite(NewCall); - if (CS && CS.getCalledFunction() && CS.getCalledFunction()->isIntrinsic()) + if (NewCall->getCalledFunction() && + NewCall->getCalledFunction()->isIntrinsic()) continue; // Remember that this call site got inlined for the client of @@ -1236,19 +1228,19 @@ static void UpdateCallGraphAfterInlining(CallSite CS, // destination. This can also happen if the call graph node of the caller // was just unnecessarily imprecise. if (!I->second->getFunction()) - if (Function *F = CallSite(NewCall).getCalledFunction()) { + if (Function *F = NewCall->getCalledFunction()) { // Indirect call site resolved to direct call. - CallerNode->addCalledFunction(CallSite(NewCall), CG[F]); + CallerNode->addCalledFunction(NewCall, CG[F]); continue; } - CallerNode->addCalledFunction(CallSite(NewCall), I->second); + CallerNode->addCalledFunction(NewCall, I->second); } // Update the call graph by deleting the edge from Callee to Caller. We must // do this after the loop above in case Caller and Callee are the same. - CallerNode->removeCallEdgeFor(CS); + CallerNode->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction())); } static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, @@ -1353,6 +1345,44 @@ static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) { return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca(); } +/// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL +/// inlined at \p InlinedAt. \p IANodes is an inlined-at cache. +static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt, + LLVMContext &Ctx, + DenseMap<const MDNode *, MDNode *> &IANodes) { + auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes); + return DebugLoc::get(OrigDL.getLine(), OrigDL.getCol(), OrigDL.getScope(), + IA); +} + +/// Returns the LoopID for a loop which has has been cloned from another +/// function for inlining with the new inlined-at start and end locs. +static MDNode *inlineLoopID(const MDNode *OrigLoopId, DILocation *InlinedAt, + LLVMContext &Ctx, + DenseMap<const MDNode *, MDNode *> &IANodes) { + assert(OrigLoopId && OrigLoopId->getNumOperands() > 0 && + "Loop ID needs at least one operand"); + assert(OrigLoopId && OrigLoopId->getOperand(0).get() == OrigLoopId && + "Loop ID should refer to itself"); + + // Save space for the self-referential LoopID. + SmallVector<Metadata *, 4> MDs = {nullptr}; + + for (unsigned i = 1; i < OrigLoopId->getNumOperands(); ++i) { + Metadata *MD = OrigLoopId->getOperand(i); + // Update the DILocations to encode the inlined-at metadata. + if (DILocation *DL = dyn_cast<DILocation>(MD)) + MDs.push_back(inlineDebugLoc(DL, InlinedAt, Ctx, IANodes)); + else + MDs.push_back(MD); + } + + MDNode *NewLoopID = MDNode::getDistinct(Ctx, MDs); + // Insert the self-referential LoopID. + NewLoopID->replaceOperandWith(0, NewLoopID); + return NewLoopID; +} + /// Update inlined instructions' line numbers to /// to encode location where these instructions are inlined. static void fixupLineNumbers(Function *Fn, Function::iterator FI, @@ -1378,10 +1408,17 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, for (; FI != Fn->end(); ++FI) { for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { + // Loop metadata needs to be updated so that the start and end locs + // reference inlined-at locations. + if (MDNode *LoopID = BI->getMetadata(LLVMContext::MD_loop)) { + MDNode *NewLoopID = + inlineLoopID(LoopID, InlinedAtNode, BI->getContext(), IANodes); + BI->setMetadata(LLVMContext::MD_loop, NewLoopID); + } + if (DebugLoc DL = BI->getDebugLoc()) { - auto IA = DebugLoc::appendInlinedAt(DL, InlinedAtNode, BI->getContext(), - IANodes); - auto IDL = DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), IA); + DebugLoc IDL = + inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes); BI->setDebugLoc(IDL); continue; } @@ -1448,47 +1485,45 @@ static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, CalleeEntryCount.getCount() < 1) return; auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; - uint64_t CallCount = + int64_t CallCount = std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0, CalleeEntryCount.getCount()); - - for (auto const &Entry : VMap) - if (isa<CallInst>(Entry.first)) - if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) - CI->updateProfWeight(CallCount, CalleeEntryCount.getCount()); - for (BasicBlock &BB : *Callee) - // No need to update the callsite if it is pruned during inlining. - if (VMap.count(&BB)) - for (Instruction &I : BB) - if (CallInst *CI = dyn_cast<CallInst>(&I)) - CI->updateProfWeight(CalleeEntryCount.getCount() - CallCount, - CalleeEntryCount.getCount()); + updateProfileCallee(Callee, -CallCount, &VMap); } -/// Update the entry count of callee after inlining. -/// -/// The callsite's block count is subtracted from the callee's function entry -/// count. -static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB, - Instruction *CallInst, Function *Callee, - ProfileSummaryInfo *PSI) { - // If the callee has a original count of N, and the estimated count of - // callsite is M, the new callee count is set to N - M. M is estimated from - // the caller's entry count, its entry block frequency and the block frequency - // of the callsite. +void llvm::updateProfileCallee( + Function *Callee, int64_t entryDelta, + const ValueMap<const Value *, WeakTrackingVH> *VMap) { auto CalleeCount = Callee->getEntryCount(); - if (!CalleeCount.hasValue() || !PSI) - return; - auto CallCount = PSI->getProfileCount(CallInst, CallerBFI); - if (!CallCount.hasValue()) + if (!CalleeCount.hasValue()) return; + + uint64_t priorEntryCount = CalleeCount.getCount(); + uint64_t newEntryCount; + // Since CallSiteCount is an estimate, it could exceed the original callee - // count and has to be set to 0. - if (CallCount.getValue() > CalleeCount.getCount()) - CalleeCount.setCount(0); + // count and has to be set to 0 so guard against underflow. + if (entryDelta < 0 && static_cast<uint64_t>(-entryDelta) > priorEntryCount) + newEntryCount = 0; else - CalleeCount.setCount(CalleeCount.getCount() - CallCount.getValue()); - Callee->setEntryCount(CalleeCount); + newEntryCount = priorEntryCount + entryDelta; + + Callee->setEntryCount(newEntryCount); + + // During inlining ? + if (VMap) { + uint64_t cloneEntryCount = priorEntryCount - newEntryCount; + for (auto const &Entry : *VMap) + if (isa<CallInst>(Entry.first)) + if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) + CI->updateProfWeight(cloneEntryCount, priorEntryCount); + } + for (BasicBlock &BB : *Callee) + // No need to update the callsite if it is pruned during inlining. + if (!VMap || VMap->count(&BB)) + for (Instruction &I : BB) + if (CallInst *CI = dyn_cast<CallInst>(&I)) + CI->updateProfWeight(newEntryCount, priorEntryCount); } /// This function inlines the called function into the basic block of the @@ -1507,6 +1542,10 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, assert(TheCall->getParent() && TheCall->getFunction() && "Instruction not in function!"); + // FIXME: we don't inline callbr yet. + if (isa<CallBrInst>(TheCall)) + return false; + // If IFI has any state in it, zap it before we fill it in. IFI.reset(); @@ -1684,8 +1723,6 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall, IFI.PSI, IFI.CallerBFI); - // Update the profile count of callee. - updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc, IFI.PSI); // Inject byval arguments initialization. for (std::pair<Value*, Value*> &Init : ByValInit) @@ -1734,6 +1771,8 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, Instruction *NewI = nullptr; if (isa<CallInst>(I)) NewI = CallInst::Create(cast<CallInst>(I), OpDefs, I); + else if (isa<CallBrInst>(I)) + NewI = CallBrInst::Create(cast<CallBrInst>(I), OpDefs, I); else NewI = InvokeInst::Create(cast<InvokeInst>(I), OpDefs, I); @@ -1817,8 +1856,7 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Move any dbg.declares describing the allocas into the entry basic block. DIBuilder DIB(*Caller->getParent()); for (auto &AI : IFI.StaticAllocas) - replaceDbgDeclareForAlloca(AI, AI, DIB, DIExpression::NoDeref, 0, - DIExpression::NoDeref); + replaceDbgDeclareForAlloca(AI, AI, DIB, DIExpression::ApplyOffset, 0); } SmallVector<Value*,4> VarArgsToForward; @@ -1869,10 +1907,8 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Add VarArgs to existing parameters. SmallVector<Value *, 6> Params(CI->arg_operands()); Params.append(VarArgsToForward.begin(), VarArgsToForward.end()); - CallInst *NewCI = - CallInst::Create(CI->getCalledFunction() ? CI->getCalledFunction() - : CI->getCalledValue(), - Params, "", CI); + CallInst *NewCI = CallInst::Create( + CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI); NewCI->setDebugLoc(CI->getDebugLoc()); NewCI->setAttributes(Attrs); NewCI->setCallingConv(CI->getCallingConv()); @@ -2038,6 +2074,8 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, Instruction *NewInst; if (CS.isCall()) NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I); + else if (CS.isCallBr()) + NewInst = CallBrInst::Create(cast<CallBrInst>(I), OpBundles, I); else NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I); NewInst->takeName(I); diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp index 003721f2b939..6c4fc1ceb991 100644 --- a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp @@ -1,9 +1,8 @@ //===- InstructionNamer.cpp - Give anonymous instructions names -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp index 4a359b99bebd..9082049c82da 100644 --- a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp +++ b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp @@ -1,9 +1,8 @@ //===-- IntegerDivision.cpp - Expand integer division ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp index 53d444b309d5..29e7c5260f46 100644 --- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -1,9 +1,8 @@ //===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -32,11 +31,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -45,6 +45,7 @@ #include "llvm/IR/PredIteratorCache.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; @@ -198,6 +199,17 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, continue; } + // If we added a single PHI, it must dominate all uses and we can directly + // rename it. + if (AddedPHIs.size() == 1) { + // Tell the VHs that the uses changed. This updates SCEV's caches. + // We might call ValueIsRAUWd multiple times for the same value. + if (UseToRewrite->get()->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(*UseToRewrite, AddedPHIs[0]); + UseToRewrite->set(AddedPHIs[0]); + continue; + } + // Otherwise, do full PHI insertion. SSAUpdate.RewriteUse(*UseToRewrite); } @@ -211,9 +223,12 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, BasicBlock *UserBB = DVI->getParent(); if (InstBB == UserBB || L->contains(UserBB)) continue; - // We currently only handle debug values residing in blocks where we have - // inserted a PHI instruction. - if (Value *V = SSAUpdate.FindValueForBlock(UserBB)) + // We currently only handle debug values residing in blocks that were + // traversed while rewriting the uses. If we inserted just a single PHI, + // we will handle all relevant debug values. + Value *V = AddedPHIs.size() == 1 ? AddedPHIs[0] + : SSAUpdate.FindValueForBlock(UserBB); + if (V) DVI->setOperand(0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(V))); } @@ -306,6 +321,12 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE) { bool Changed = false; +#ifdef EXPENSIVE_CHECKS + // Verify all sub-loops are in LCSSA form already. + for (Loop *SubLoop: L) + assert(SubLoop->isRecursivelyLCSSAForm(DT, *LI) && "Subloop not in LCSSA!"); +#endif + SmallVector<BasicBlock *, 8> ExitBlocks; L.getExitBlocks(ExitBlocks); if (ExitBlocks.empty()) @@ -325,6 +346,10 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, // Look at all the instructions in the loop, checking to see if they have uses // outside the loop. If so, put them into the worklist to rewrite those uses. for (BasicBlock *BB : BlocksDominatingExits) { + // Skip blocks that are part of any sub-loops, they must be in LCSSA + // already. + if (LI->getLoopFor(BB) != &L) + continue; for (Instruction &I : *BB) { // Reject two common cases fast: instructions with no uses (like stores) // and instructions with one use that is in the same block as this. @@ -419,6 +444,8 @@ struct LCSSAWrapperPass : public FunctionPass { AU.addPreserved<GlobalsAAWrapperPass>(); AU.addPreserved<ScalarEvolutionWrapperPass>(); AU.addPreserved<SCEVAAWrapperPass>(); + AU.addPreserved<BranchProbabilityInfoWrapperPass>(); + AU.addPreserved<MemorySSAWrapperPass>(); // This is needed to perform LCSSA verification inside LPPassManager AU.addRequired<LCSSAVerificationPass>(); @@ -462,5 +489,9 @@ PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) { PA.preserve<GlobalsAA>(); PA.preserve<SCEVAA>(); PA.preserve<ScalarEvolutionAnalysis>(); + // BPI maps terminators to probabilities, since we don't modify the CFG, no + // updates are needed to preserve it. + PA.preserve<BranchProbabilityAnalysis>(); + PA.preserve<MemorySSAAnalysis>(); return PA; } diff --git a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp index e1592c867636..8c67d1dc6eb3 100644 --- a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp @@ -1,9 +1,8 @@ //===-- LibCallsShrinkWrap.cpp ----------------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index 499e611acb57..39b6b889f91c 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -1,9 +1,8 @@ //===- Local.cpp - Functions to perform local transformations -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -27,6 +26,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" @@ -49,7 +49,6 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" @@ -92,6 +91,10 @@ using namespace llvm::PatternMatch; STATISTIC(NumRemoved, "Number of unreachable basic blocks removed"); +// Max recursion depth for collectBitParts used when detecting bswap and +// bitreverse idioms +static const unsigned BitPartRecursionMaxDepth = 64; + //===----------------------------------------------------------------------===// // Local constant propagation. // @@ -129,7 +132,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, Builder.CreateBr(Destination); BI->eraseFromParent(); if (DTU) - DTU->deleteEdgeRelaxed(BB, OldDest); + DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, OldDest}}); return true; } @@ -205,7 +208,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, i = SI->removeCase(i); e = SI->case_end(); if (DTU) - DTU->deleteEdgeRelaxed(ParentBB, DefaultDest); + DTU->applyUpdatesPermissive( + {{DominatorTree::Delete, ParentBB, DefaultDest}}); continue; } @@ -253,7 +257,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, if (DeleteDeadConditions) RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); if (DTU) - DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true); + DTU->applyUpdatesPermissive(Updates); return true; } @@ -331,7 +335,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, } if (DTU) - DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true); + DTU->applyUpdatesPermissive(Updates); return true; } } @@ -416,8 +420,8 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0))) return C->isNullValue() || isa<UndefValue>(C); - if (CallSite CS = CallSite(I)) - if (isMathLibCallNoop(CS, TLI)) + if (auto *Call = dyn_cast<CallBase>(I)) + if (isMathLibCallNoop(Call, TLI)) return true; return false; @@ -430,7 +434,7 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, bool llvm::RecursivelyDeleteTriviallyDeadInstructions( Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU) { Instruction *I = dyn_cast<Instruction>(V); - if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI)) + if (!I || !isInstructionTriviallyDead(I, TLI)) return false; SmallVector<Instruction*, 16> DeadInsts; @@ -665,7 +669,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, if (PhiIt != OldPhiIt) PhiIt = &BB->front(); } if (DTU) - DTU->deleteEdgeRelaxed(Pred, BB); + DTU->applyUpdatesPermissive({{DominatorTree::Delete, Pred, BB}}); } /// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its @@ -734,7 +738,7 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, isa<UnreachableInst>(PredBB->getTerminator()) && "The successor list of PredBB isn't empty before " "applying corresponding DTU updates."); - DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true); + DTU->applyUpdatesPermissive(Updates); DTU->deleteBB(PredBB); // Recalculation of DomTree is needed when updating a forward DomTree and // the Entry BB is replaced. @@ -997,6 +1001,18 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, } } + // We cannot fold the block if it's a branch to an already present callbr + // successor because that creates duplicate successors. + for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { + if (auto *CBI = dyn_cast<CallBrInst>((*I)->getTerminator())) { + if (Succ == CBI->getDefaultDest()) + return false; + for (unsigned i = 0, e = CBI->getNumIndirectDests(); i != e; ++i) + if (Succ == CBI->getIndirectDest(i)) + return false; + } + } + LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); SmallVector<DominatorTree::UpdateType, 32> Updates; @@ -1064,7 +1080,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, "applying corresponding DTU updates."); if (DTU) { - DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true); + DTU->applyUpdatesPermissive(Updates); DTU->deleteBB(BB); } else { BB->eraseFromParent(); // Delete the old basic block. @@ -1272,6 +1288,19 @@ static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) { return false; } +/// Produce a DebugLoc to use for each dbg.declare/inst pair that are promoted +/// to a dbg.value. Because no machine insts can come from debug intrinsics, +/// only the scope and inlinedAt is significant. Zero line numbers are used in +/// case this DebugLoc leaks into any adjacent instructions. +static DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII, Instruction *Src) { + // Original dbg.declare must have a location. + DebugLoc DeclareLoc = DII->getDebugLoc(); + MDNode *Scope = DeclareLoc.getScope(); + DILocation *InlinedAt = DeclareLoc.getInlinedAt(); + // Produce an unknown location with the correct scope / inlinedAt fields. + return DebugLoc::get(0, 0, Scope, InlinedAt); +} + /// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value /// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic. void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, @@ -1280,9 +1309,11 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, auto *DIVar = DII->getVariable(); assert(DIVar && "Missing variable"); auto *DIExpr = DII->getExpression(); - Value *DV = SI->getOperand(0); + Value *DV = SI->getValueOperand(); + + DebugLoc NewLoc = getDebugValueLoc(DII, SI); - if (!valueCoversEntireFragment(SI->getValueOperand()->getType(), DII)) { + if (!valueCoversEntireFragment(DV->getType(), DII)) { // FIXME: If storing to a part of the variable described by the dbg.declare, // then we want to insert a dbg.value for the corresponding fragment. LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: " @@ -1292,14 +1323,12 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, // know nothing about the variable's content. DV = UndefValue::get(DV->getType()); if (!LdStHasDebugValue(DIVar, DIExpr, SI)) - Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, DII->getDebugLoc(), - SI); + Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI); return; } if (!LdStHasDebugValue(DIVar, DIExpr, SI)) - Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, DII->getDebugLoc(), - SI); + Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI); } /// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value @@ -1322,12 +1351,14 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, return; } + DebugLoc NewLoc = getDebugValueLoc(DII, nullptr); + // We are now tracking the loaded value instead of the address. In the // future if multi-location support is added to the IR, it might be // preferable to keep tracking both the loaded value and the original // address in case the alloca can not be elided. Instruction *DbgValue = Builder.insertDbgValueIntrinsic( - LI, DIVar, DIExpr, DII->getDebugLoc(), (Instruction *)nullptr); + LI, DIVar, DIExpr, NewLoc, (Instruction *)nullptr); DbgValue->insertAfter(LI); } @@ -1354,12 +1385,13 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, BasicBlock *BB = APN->getParent(); auto InsertionPt = BB->getFirstInsertionPt(); + DebugLoc NewLoc = getDebugValueLoc(DII, nullptr); + // The block may be a catchswitch block, which does not have a valid // insertion point. // FIXME: Insert dbg.value markers in the successors when appropriate. if (InsertionPt != BB->end()) - Builder.insertDbgValueIntrinsic(APN, DIVar, DIExpr, DII->getDebugLoc(), - &*InsertionPt); + Builder.insertDbgValueIntrinsic(APN, DIVar, DIExpr, NewLoc, &*InsertionPt); } /// Determine whether this alloca is either a VLA or an array. @@ -1414,10 +1446,11 @@ bool llvm::LowerDbgDeclare(Function &F) { // This is a call by-value or some other instruction that takes a // pointer to the variable. Insert a *value* intrinsic that describes // the variable by dereferencing the alloca. + DebugLoc NewLoc = getDebugValueLoc(DDI, nullptr); auto *DerefExpr = DIExpression::append(DDI->getExpression(), dwarf::DW_OP_deref); - DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr, - DDI->getDebugLoc(), CI); + DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr, NewLoc, + CI); } } DDI->eraseFromParent(); @@ -1519,14 +1552,14 @@ void llvm::findDbgUsers(SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers, bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, Instruction *InsertBefore, DIBuilder &Builder, - bool DerefBefore, int Offset, bool DerefAfter) { + uint8_t DIExprFlags, int Offset) { auto DbgAddrs = FindDbgAddrUses(Address); for (DbgVariableIntrinsic *DII : DbgAddrs) { DebugLoc Loc = DII->getDebugLoc(); auto *DIVar = DII->getVariable(); auto *DIExpr = DII->getExpression(); assert(DIVar && "Missing variable"); - DIExpr = DIExpression::prepend(DIExpr, DerefBefore, Offset, DerefAfter); + DIExpr = DIExpression::prepend(DIExpr, DIExprFlags, Offset); // Insert llvm.dbg.declare immediately before InsertBefore, and remove old // llvm.dbg.declare. Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore); @@ -1538,10 +1571,10 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, } bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, - DIBuilder &Builder, bool DerefBefore, - int Offset, bool DerefAfter) { + DIBuilder &Builder, uint8_t DIExprFlags, + int Offset) { return replaceDbgDeclare(AI, NewAllocaAddress, AI->getNextNode(), Builder, - DerefBefore, Offset, DerefAfter); + DIExprFlags, Offset); } static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress, @@ -1594,120 +1627,119 @@ bool llvm::salvageDebugInfo(Instruction &I) { if (DbgUsers.empty()) return false; - auto &M = *I.getModule(); - auto &DL = M.getDataLayout(); + return salvageDebugInfoForDbgValues(I, DbgUsers); +} + +bool llvm::salvageDebugInfoForDbgValues( + Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers) { auto &Ctx = I.getContext(); auto wrapMD = [&](Value *V) { return wrapValueInMetadata(Ctx, V); }; - auto doSalvage = [&](DbgVariableIntrinsic *DII, SmallVectorImpl<uint64_t> &Ops) { - auto *DIExpr = DII->getExpression(); - if (!Ops.empty()) { - // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they - // are implicitly pointing out the value as a DWARF memory location - // description. - bool WithStackValue = isa<DbgValueInst>(DII); - DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue); - } + for (auto *DII : DbgUsers) { + // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they + // are implicitly pointing out the value as a DWARF memory location + // description. + bool StackValue = isa<DbgValueInst>(DII); + + DIExpression *DIExpr = + salvageDebugInfoImpl(I, DII->getExpression(), StackValue); + + // salvageDebugInfoImpl should fail on examining the first element of + // DbgUsers, or none of them. + if (!DIExpr) + return false; + DII->setOperand(0, wrapMD(I.getOperand(0))); DII->setOperand(2, MetadataAsValue::get(Ctx, DIExpr)); LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n'); + } + + return true; +} + +DIExpression *llvm::salvageDebugInfoImpl(Instruction &I, + DIExpression *SrcDIExpr, + bool WithStackValue) { + auto &M = *I.getModule(); + auto &DL = M.getDataLayout(); + + // Apply a vector of opcodes to the source DIExpression. + auto doSalvage = [&](SmallVectorImpl<uint64_t> &Ops) -> DIExpression * { + DIExpression *DIExpr = SrcDIExpr; + if (!Ops.empty()) { + DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue); + } + return DIExpr; }; - auto applyOffset = [&](DbgVariableIntrinsic *DII, uint64_t Offset) { + // Apply the given offset to the source DIExpression. + auto applyOffset = [&](uint64_t Offset) -> DIExpression * { SmallVector<uint64_t, 8> Ops; DIExpression::appendOffset(Ops, Offset); - doSalvage(DII, Ops); + return doSalvage(Ops); }; - auto applyOps = [&](DbgVariableIntrinsic *DII, - std::initializer_list<uint64_t> Opcodes) { + // initializer-list helper for applying operators to the source DIExpression. + auto applyOps = + [&](std::initializer_list<uint64_t> Opcodes) -> DIExpression * { SmallVector<uint64_t, 8> Ops(Opcodes); - doSalvage(DII, Ops); + return doSalvage(Ops); }; if (auto *CI = dyn_cast<CastInst>(&I)) { - if (!CI->isNoopCast(DL)) - return false; - - // No-op casts are irrelevant for debug info. - MetadataAsValue *CastSrc = wrapMD(I.getOperand(0)); - for (auto *DII : DbgUsers) { - DII->setOperand(0, CastSrc); - LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n'); - } - return true; + // No-op casts and zexts are irrelevant for debug info. + if (CI->isNoopCast(DL) || isa<ZExtInst>(&I)) + return SrcDIExpr; + return nullptr; } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) { unsigned BitWidth = M.getDataLayout().getIndexSizeInBits(GEP->getPointerAddressSpace()); - // Rewrite a constant GEP into a DIExpression. Since we are performing - // arithmetic to compute the variable's *value* in the DIExpression, we - // need to mark the expression with a DW_OP_stack_value. + // Rewrite a constant GEP into a DIExpression. APInt Offset(BitWidth, 0); - if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) - for (auto *DII : DbgUsers) - applyOffset(DII, Offset.getSExtValue()); - return true; + if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) { + return applyOffset(Offset.getSExtValue()); + } else { + return nullptr; + } } else if (auto *BI = dyn_cast<BinaryOperator>(&I)) { // Rewrite binary operations with constant integer operands. auto *ConstInt = dyn_cast<ConstantInt>(I.getOperand(1)); if (!ConstInt || ConstInt->getBitWidth() > 64) - return false; + return nullptr; uint64_t Val = ConstInt->getSExtValue(); - for (auto *DII : DbgUsers) { - switch (BI->getOpcode()) { - case Instruction::Add: - applyOffset(DII, Val); - break; - case Instruction::Sub: - applyOffset(DII, -int64_t(Val)); - break; - case Instruction::Mul: - applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_mul}); - break; - case Instruction::SDiv: - applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_div}); - break; - case Instruction::SRem: - applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_mod}); - break; - case Instruction::Or: - applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_or}); - break; - case Instruction::And: - applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_and}); - break; - case Instruction::Xor: - applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_xor}); - break; - case Instruction::Shl: - applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shl}); - break; - case Instruction::LShr: - applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shr}); - break; - case Instruction::AShr: - applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shra}); - break; - default: - // TODO: Salvage constants from each kind of binop we know about. - return false; - } + switch (BI->getOpcode()) { + case Instruction::Add: + return applyOffset(Val); + case Instruction::Sub: + return applyOffset(-int64_t(Val)); + case Instruction::Mul: + return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_mul}); + case Instruction::SDiv: + return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_div}); + case Instruction::SRem: + return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_mod}); + case Instruction::Or: + return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_or}); + case Instruction::And: + return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_and}); + case Instruction::Xor: + return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_xor}); + case Instruction::Shl: + return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shl}); + case Instruction::LShr: + return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shr}); + case Instruction::AShr: + return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shra}); + default: + // TODO: Salvage constants from each kind of binop we know about. + return nullptr; } - return true; - } else if (isa<LoadInst>(&I)) { - MetadataAsValue *AddrMD = wrapMD(I.getOperand(0)); - for (auto *DII : DbgUsers) { - // Rewrite the load into DW_OP_deref. - auto *DIExpr = DII->getExpression(); - DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref); - DII->setOperand(0, AddrMD); - DII->setOperand(2, MetadataAsValue::get(Ctx, DIExpr)); - LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n'); - } - return true; + // *Not* to do: we should not attempt to salvage load instructions, + // because the validity and lifetime of a dbg.value containing + // DW_OP_deref becomes difficult to analyze. See PR40628 for examples. } - return false; + return nullptr; } /// A replacement for a dbg.value expression. @@ -1849,21 +1881,10 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To, return None; bool Signed = *Signedness == DIBasicType::Signedness::Signed; - - if (!Signed) { - // In the unsigned case, assume that a debugger will initialize the - // high bits to 0 and do a no-op conversion. - return Identity(DII); - } else { - // In the signed case, the high bits are given by sign extension, i.e: - // (To >> (ToBits - 1)) * ((2 ^ FromBits) - 1) - // Calculate the high bits and OR them together with the low bits. - SmallVector<uint64_t, 8> Ops({dwarf::DW_OP_dup, dwarf::DW_OP_constu, - (ToBits - 1), dwarf::DW_OP_shr, - dwarf::DW_OP_lit0, dwarf::DW_OP_not, - dwarf::DW_OP_mul, dwarf::DW_OP_or}); - return DIExpression::appendToStack(DII.getExpression(), Ops); - } + dwarf::TypeKind TK = Signed ? dwarf::DW_ATE_signed : dwarf::DW_ATE_unsigned; + SmallVector<uint64_t, 8> Ops({dwarf::DW_OP_LLVM_convert, ToBits, TK, + dwarf::DW_OP_LLVM_convert, FromBits, TK}); + return DIExpression::appendToStack(DII.getExpression(), Ops); }; return rewriteDebugUsers(From, To, DomPoint, DT, SignOrZeroExt); } @@ -1894,10 +1915,14 @@ unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { } unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap, - bool PreserveLCSSA, DomTreeUpdater *DTU) { + bool PreserveLCSSA, DomTreeUpdater *DTU, + MemorySSAUpdater *MSSAU) { BasicBlock *BB = I->getParent(); std::vector <DominatorTree::UpdateType> Updates; + if (MSSAU) + MSSAU->changeToUnreachable(I); + // Loop over all of the successors, removing BB's entry from any PHI // nodes. if (DTU) @@ -1928,7 +1953,7 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap, ++NumInstrsRemoved; } if (DTU) - DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true); + DTU->applyUpdatesPermissive(Updates); return NumInstrsRemoved; } @@ -1937,8 +1962,8 @@ static void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr) { SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end()); SmallVector<OperandBundleDef, 1> OpBundles; II->getOperandBundlesAsDefs(OpBundles); - CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, OpBundles, - "", II); + CallInst *NewCall = CallInst::Create( + II->getFunctionType(), II->getCalledValue(), Args, OpBundles, "", II); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); @@ -1956,7 +1981,7 @@ static void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr) { UnwindDestBB->removePredecessor(BB); II->eraseFromParent(); if (DTU) - DTU->deleteEdgeRelaxed(BB, UnwindDestBB); + DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, UnwindDestBB}}); } BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, @@ -1981,8 +2006,9 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, // can potentially be avoided with a cleverer API design that we do not have // as of this time. - InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge, - InvokeArgs, OpBundles, CI->getName(), BB); + InvokeInst *II = + InvokeInst::Create(CI->getFunctionType(), CI->getCalledValue(), Split, + UnwindEdge, InvokeArgs, OpBundles, CI->getName(), BB); II->setDebugLoc(CI->getDebugLoc()); II->setCallingConv(CI->getCallingConv()); II->setAttributes(CI->getAttributes()); @@ -2052,7 +2078,7 @@ static bool markAliveBlocks(Function &F, Changed = true; break; } - if (CI->doesNotReturn()) { + if (CI->doesNotReturn() && !CI->isMustTailCall()) { // If we found a call to a no-return function, insert an unreachable // instruction after it. Make sure there isn't *already* one there // though. @@ -2102,7 +2128,8 @@ static bool markAliveBlocks(Function &F, UnwindDestBB->removePredecessor(II->getParent()); II->eraseFromParent(); if (DTU) - DTU->deleteEdgeRelaxed(BB, UnwindDestBB); + DTU->applyUpdatesPermissive( + {{DominatorTree::Delete, BB, UnwindDestBB}}); } else changeToCall(II, DTU); Changed = true; @@ -2191,7 +2218,7 @@ void llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) { TI->replaceAllUsesWith(NewTI); TI->eraseFromParent(); if (DTU) - DTU->deleteEdgeRelaxed(BB, UnwindDest); + DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, UnwindDest}}); } /// removeUnreachableBlocks - Remove blocks that are not reachable, even @@ -2211,7 +2238,7 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, assert(Reachable.size() < F.size()); NumRemoved += F.size()-Reachable.size(); - SmallPtrSet<BasicBlock *, 16> DeadBlockSet; + SmallSetVector<BasicBlock *, 8> DeadBlockSet; for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) { auto *BB = &*I; if (Reachable.count(BB)) @@ -2256,7 +2283,7 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, } if (DTU) { - DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true); + DTU->applyUpdatesPermissive(Updates); bool Deleted = false; for (auto *BB : DeadBlockSet) { if (DTU->isBBPendingDeletion(BB)) @@ -2450,12 +2477,12 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, return ::replaceDominatedUsesWith(From, To, BB, ProperlyDominates); } -bool llvm::callsGCLeafFunction(ImmutableCallSite CS, +bool llvm::callsGCLeafFunction(const CallBase *Call, const TargetLibraryInfo &TLI) { // Check if the function is specifically marked as a gc leaf function. - if (CS.hasFnAttr("gc-leaf-function")) + if (Call->hasFnAttr("gc-leaf-function")) return true; - if (const Function *F = CS.getCalledFunction()) { + if (const Function *F = Call->getCalledFunction()) { if (F->hasFnAttribute("gc-leaf-function")) return true; @@ -2469,7 +2496,7 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS, // marked as 'gc-leaf-function.' All available Libcalls are // GC-leaf. LibFunc LF; - if (TLI.getLibFunc(CS, LF)) { + if (TLI.getLibFunc(ImmutableCallSite(Call), LF)) { return TLI.has(LF); } @@ -2530,13 +2557,13 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB) { // Since we are moving the instructions out of its basic block, we do not // retain their original debug locations (DILocations) and debug intrinsic - // instructions (dbg.values). + // instructions. // // Doing so would degrade the debugging experience and adversely affect the // accuracy of profiling information. // // Currently, when hoisting the instructions, we take the following actions: - // - Remove their dbg.values. + // - Remove their debug intrinsic instructions. // - Set their debug locations to the values from the insertion point. // // As per PR39141 (comment #8), the more fundamental reason why the dbg.values @@ -2554,7 +2581,7 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, I->dropUnknownNonDebugMetadata(); if (I->isUsedByMetadata()) dropDebugUsers(*I); - if (isa<DbgVariableIntrinsic>(I)) { + if (isa<DbgInfoIntrinsic>(I)) { // Remove DbgInfo Intrinsics. II = I->eraseFromParent(); continue; @@ -2613,7 +2640,7 @@ struct BitPart { /// does not invalidate internal references (std::map instead of DenseMap). static const Optional<BitPart> & collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, - std::map<Value *, Optional<BitPart>> &BPS) { + std::map<Value *, Optional<BitPart>> &BPS, int Depth) { auto I = BPS.find(V); if (I != BPS.end()) return I->second; @@ -2621,13 +2648,19 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, auto &Result = BPS[V] = None; auto BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); + // Prevent stack overflow by limiting the recursion depth + if (Depth == BitPartRecursionMaxDepth) { + LLVM_DEBUG(dbgs() << "collectBitParts max recursion depth reached.\n"); + return Result; + } + if (Instruction *I = dyn_cast<Instruction>(V)) { // If this is an or instruction, it may be an inner node of the bswap. if (I->getOpcode() == Instruction::Or) { auto &A = collectBitParts(I->getOperand(0), MatchBSwaps, - MatchBitReversals, BPS); + MatchBitReversals, BPS, Depth + 1); auto &B = collectBitParts(I->getOperand(1), MatchBSwaps, - MatchBitReversals, BPS); + MatchBitReversals, BPS, Depth + 1); if (!A || !B) return Result; @@ -2660,7 +2693,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, return Result; auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, - MatchBitReversals, BPS); + MatchBitReversals, BPS, Depth + 1); if (!Res) return Result; Result = Res; @@ -2692,7 +2725,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, return Result; auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, - MatchBitReversals, BPS); + MatchBitReversals, BPS, Depth + 1); if (!Res) return Result; Result = Res; @@ -2707,7 +2740,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, // If this is a zext instruction zero extend the result. if (I->getOpcode() == Instruction::ZExt) { auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, - MatchBitReversals, BPS); + MatchBitReversals, BPS, Depth + 1); if (!Res) return Result; @@ -2769,7 +2802,7 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( // Try to find all the pieces corresponding to the bswap. std::map<Value *, Optional<BitPart>> BPS; - auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS); + auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS, 0); if (!Res) return false; auto &BitProvenance = Res->Provenance; @@ -2883,3 +2916,41 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) { return true; } } + +using AllocaForValueMapTy = DenseMap<Value *, AllocaInst *>; +AllocaInst *llvm::findAllocaForValue(Value *V, + AllocaForValueMapTy &AllocaForValue) { + if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) + return AI; + // See if we've already calculated (or started to calculate) alloca for a + // given value. + AllocaForValueMapTy::iterator I = AllocaForValue.find(V); + if (I != AllocaForValue.end()) + return I->second; + // Store 0 while we're calculating alloca for value V to avoid + // infinite recursion if the value references itself. + AllocaForValue[V] = nullptr; + AllocaInst *Res = nullptr; + if (CastInst *CI = dyn_cast<CastInst>(V)) + Res = findAllocaForValue(CI->getOperand(0), AllocaForValue); + else if (PHINode *PN = dyn_cast<PHINode>(V)) { + for (Value *IncValue : PN->incoming_values()) { + // Allow self-referencing phi-nodes. + if (IncValue == PN) + continue; + AllocaInst *IncValueAI = findAllocaForValue(IncValue, AllocaForValue); + // AI for incoming values should exist and should all be equal. + if (IncValueAI == nullptr || (Res != nullptr && IncValueAI != Res)) + return nullptr; + Res = IncValueAI; + } + } else if (GetElementPtrInst *EP = dyn_cast<GetElementPtrInst>(V)) { + Res = findAllocaForValue(EP->getPointerOperand(), AllocaForValue); + } else { + LLVM_DEBUG(dbgs() << "Alloca search cancelled on unknown instruction: " + << *V << "\n"); + } + if (Res) + AllocaForValue[V] = Res; + return Res; +} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp index 41f14a834617..37389a695b45 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -1,9 +1,8 @@ //===----------------- LoopRotationUtils.cpp -----------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -17,6 +16,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" @@ -28,7 +28,6 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" @@ -296,7 +295,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // Begin by walking OrigHeader and populating ValueMap with an entry for // each Instruction. BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end(); - ValueToValueMapTy ValueMap; + ValueToValueMapTy ValueMap, ValueMapMSSA; // For PHI nodes, the value available in OldPreHeader is just the // incoming value from OldPreHeader. @@ -375,6 +374,9 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { if (auto *II = dyn_cast<IntrinsicInst>(C)) if (II->getIntrinsicID() == Intrinsic::assume) AC->registerAssumption(II); + // MemorySSA cares whether the cloned instruction was inserted or not, and + // not whether it can be remapped to a simplified value. + ValueMapMSSA[Inst] = C; } } @@ -392,10 +394,11 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { LoopEntryBranch->eraseFromParent(); // Update MemorySSA before the rewrite call below changes the 1:1 - // instruction:cloned_instruction_or_value mapping in ValueMap. + // instruction:cloned_instruction_or_value mapping. if (MSSAU) { - ValueMap[OrigHeader] = OrigPreheader; - MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader, ValueMap); + ValueMapMSSA[OrigHeader] = OrigPreheader; + MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader, + ValueMapMSSA); } SmallVector<PHINode*, 2> InsertedPHIs; @@ -463,9 +466,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { for (BasicBlock *ExitPred : ExitPreds) { // We only need to split loop exit edges. Loop *PredLoop = LI->getLoopFor(ExitPred); - if (!PredLoop || PredLoop->contains(Exit)) - continue; - if (isa<IndirectBrInst>(ExitPred->getTerminator())) + if (!PredLoop || PredLoop->contains(Exit) || + ExitPred->getTerminator()->isIndirectTerminator()) continue; SplitLatchEdge |= L->getLoopLatch() == ExitPred; BasicBlock *ExitSplit = SplitCriticalEdge( diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 380f4fca54d9..7e6da02d5707 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -1,9 +1,8 @@ //===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -28,6 +27,9 @@ // to transform the loop and make these guarantees. Client code should check // that these conditions are true before relying on them. // +// Similar complications arise from callbr instructions, particularly in +// asm-goto where blockaddress expressions are used. +// // Note that the simplifycfg pass will clean up blocks which are split out but // end up being unnecessary, so usage of this pass should not pessimize // generated code. @@ -46,13 +48,15 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -67,6 +71,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; @@ -115,7 +120,8 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB, /// preheader insertion and analysis updating. /// BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, - LoopInfo *LI, bool PreserveLCSSA) { + LoopInfo *LI, MemorySSAUpdater *MSSAU, + bool PreserveLCSSA) { BasicBlock *Header = L->getHeader(); // Compute the set of predecessors of the loop that are not in the loop. @@ -124,10 +130,11 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, PI != PE; ++PI) { BasicBlock *P = *PI; if (!L->contains(P)) { // Coming in from outside the loop? - // If the loop is branched to from an indirect branch, we won't + // If the loop is branched to from an indirect terminator, we won't // be able to fully transform the loop, because it prohibits // edge splitting. - if (isa<IndirectBrInst>(P->getTerminator())) return nullptr; + if (P->getTerminator()->isIndirectTerminator()) + return nullptr; // Keep track of it. OutsideBlocks.push_back(P); @@ -137,7 +144,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, // Split out the loop pre-header. BasicBlock *PreheaderBB; PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT, - LI, nullptr, PreserveLCSSA); + LI, MSSAU, PreserveLCSSA); if (!PreheaderBB) return nullptr; @@ -217,7 +224,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT, static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, bool PreserveLCSSA, - AssumptionCache *AC) { + AssumptionCache *AC, MemorySSAUpdater *MSSAU) { // Don't try to separate loops without a preheader. if (!Preheader) return nullptr; @@ -236,8 +243,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { if (PN->getIncomingValue(i) != PN || !L->contains(PN->getIncomingBlock(i))) { - // We can't split indirectbr edges. - if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator())) + // We can't split indirect control flow edges. + if (PN->getIncomingBlock(i)->getTerminator()->isIndirectTerminator()) return nullptr; OuterLoopPreds.push_back(PN->getIncomingBlock(i)); } @@ -251,7 +258,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, SE->forgetLoop(L); BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", - DT, LI, nullptr, PreserveLCSSA); + DT, LI, MSSAU, PreserveLCSSA); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. @@ -314,7 +321,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, // Split edges to exit blocks from the inner loop, if they emerged in the // process of separating the outer one. - formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA); + formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA); if (PreserveLCSSA) { // Fix LCSSA form for L. Some values, which previously were only used inside @@ -339,7 +346,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, /// and have that block branch to the loop header. This ensures that loops /// have exactly one backedge. static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, - DominatorTree *DT, LoopInfo *LI) { + DominatorTree *DT, LoopInfo *LI, + MemorySSAUpdater *MSSAU) { assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); // Get information about the loop @@ -358,8 +366,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ BasicBlock *P = *I; - // Indirectbr edges cannot be split, so we must fail if we find one. - if (isa<IndirectBrInst>(P->getTerminator())) + // Indirect edges cannot be split, so we must fail if we find one. + if (P->getTerminator()->isIndirectTerminator()) return nullptr; if (P != Preheader) BackedgeBlocks.push_back(P); @@ -439,9 +447,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, if (!LoopMD) LoopMD = TI->getMetadata(LoopMDKind); TI->setMetadata(LoopMDKind, nullptr); - for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op) - if (TI->getSuccessor(Op) == Header) - TI->setSuccessor(Op, BEBlock); + TI->replaceSuccessorWith(Header, BEBlock); } BEBlock->getTerminator()->setMetadata(LoopMDKind, LoopMD); @@ -454,6 +460,10 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, // Update dominator information DT->splitBlock(BEBlock); + if (MSSAU) + MSSAU->updatePhisWhenInsertingUniqueBackedgeBlock(Header, Preheader, + BEBlock); + return BEBlock; } @@ -461,8 +471,11 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, - bool PreserveLCSSA) { + MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { bool Changed = false; + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + ReprocessLoop: // Check to see that no blocks (other than the header) in this loop have @@ -489,11 +502,15 @@ ReprocessLoop: // Zap the dead pred's terminator and replace it with unreachable. Instruction *TI = P->getTerminator(); - changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA); + changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA, + /*DTU=*/nullptr, MSSAU); Changed = true; } } + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + // If there are exiting blocks with branches on undef, resolve the undef in // the direction which will exit the loop. This will help simplify loop // trip count computations. @@ -518,7 +535,7 @@ ReprocessLoop: // Does the loop already have a preheader? If so, don't insert one. BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { - Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); + Preheader = InsertPreheaderForLoop(L, DT, LI, MSSAU, PreserveLCSSA); if (Preheader) Changed = true; } @@ -527,9 +544,12 @@ ReprocessLoop: // predecessors that are inside of the loop. This check guarantees that the // loop preheader/header will dominate the exit blocks. If the exit block has // predecessors from outside of the loop, split the edge now. - if (formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA)) + if (formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA)) Changed = true; + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + // If the header has more than two predecessors at this point (from the // preheader and from multiple backedges), we must adjust the loop. BasicBlock *LoopLatch = L->getLoopLatch(); @@ -538,8 +558,8 @@ ReprocessLoop: // this for loops with a giant number of backedges, just factor them into a // common backedge instead. if (L->getNumBackEdges() < 8) { - if (Loop *OuterL = - separateNestedLoop(L, Preheader, DT, LI, SE, PreserveLCSSA, AC)) { + if (Loop *OuterL = separateNestedLoop(L, Preheader, DT, LI, SE, + PreserveLCSSA, AC, MSSAU)) { ++NumNested; // Enqueue the outer loop as it should be processed next in our // depth-first nest walk. @@ -556,11 +576,14 @@ ReprocessLoop: // If we either couldn't, or didn't want to, identify nesting of the loops, // insert a new block that all backedges target, then make it jump to the // loop header. - LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI); + LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI, MSSAU); if (LoopLatch) Changed = true; } + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); // Scan over the PHI nodes in the loop header. Since they now have only two @@ -618,9 +641,9 @@ ReprocessLoop: Instruction *Inst = &*I++; if (Inst == CI) continue; - if (!L->makeLoopInvariant(Inst, AnyInvariant, - Preheader ? Preheader->getTerminator() - : nullptr)) { + if (!L->makeLoopInvariant( + Inst, AnyInvariant, + Preheader ? Preheader->getTerminator() : nullptr, MSSAU)) { AllInvariant = false; break; } @@ -637,7 +660,7 @@ ReprocessLoop: // The block has now been cleared of all instructions except for // a comparison and a conditional branch. SimplifyCFG may be able // to fold it now. - if (!FoldBranchToCommonDest(BI)) + if (!FoldBranchToCommonDest(BI, MSSAU)) continue; // Success. The block is now dead, so remove it from the loop, @@ -657,11 +680,16 @@ ReprocessLoop: DT->changeImmediateDominator(Child, Node->getIDom()); } DT->eraseNode(ExitingBlock); + if (MSSAU) { + SmallSetVector<BasicBlock *, 8> ExitBlockSet; + ExitBlockSet.insert(ExitingBlock); + MSSAU->removeBlocks(ExitBlockSet); + } BI->getSuccessor(0)->removePredecessor( - ExitingBlock, /* DontDeleteUselessPHIs */ PreserveLCSSA); + ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA); BI->getSuccessor(1)->removePredecessor( - ExitingBlock, /* DontDeleteUselessPHIs */ PreserveLCSSA); + ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA); ExitingBlock->eraseFromParent(); } } @@ -672,12 +700,15 @@ ReprocessLoop: if (Changed && SE) SE->forgetTopmostLoop(L); + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + return Changed; } bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, - bool PreserveLCSSA) { + MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { bool Changed = false; #ifndef NDEBUG @@ -705,7 +736,7 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, while (!Worklist.empty()) Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE, - AC, PreserveLCSSA); + AC, MSSAU, PreserveLCSSA); return Changed; } @@ -737,6 +768,9 @@ namespace { AU.addPreservedID(LCSSAID); AU.addPreserved<DependenceAnalysisWrapperPass>(); AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. + AU.addPreserved<BranchProbabilityInfoWrapperPass>(); + if (EnableMSSALoopDependency) + AU.addPreserved<MemorySSAWrapperPass>(); } /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. @@ -768,12 +802,21 @@ bool LoopSimplify::runOnFunction(Function &F) { ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr; AssumptionCache *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + MemorySSA *MSSA = nullptr; + std::unique_ptr<MemorySSAUpdater> MSSAU; + if (EnableMSSALoopDependency) { + auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); + if (MSSAAnalysis) { + MSSA = &MSSAAnalysis->getMSSA(); + MSSAU = make_unique<MemorySSAUpdater>(MSSA); + } + } bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); // Simplify each loop nest in the function. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= simplifyLoop(*I, DT, LI, SE, AC, PreserveLCSSA); + Changed |= simplifyLoop(*I, DT, LI, SE, AC, MSSAU.get(), PreserveLCSSA); #ifndef NDEBUG if (PreserveLCSSA) { @@ -794,9 +837,10 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F, AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F); // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA - // after simplifying the loops. + // after simplifying the loops. MemorySSA is not preserved either. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= simplifyLoop(*I, DT, LI, SE, AC, /*PreserveLCSSA*/ false); + Changed |= + simplifyLoop(*I, DT, LI, SE, AC, nullptr, /*PreserveLCSSA*/ false); if (!Changed) return PreservedAnalyses::all(); @@ -809,6 +853,12 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F, PA.preserve<SCEVAA>(); PA.preserve<ScalarEvolutionAnalysis>(); PA.preserve<DependenceAnalysis>(); + // BPI maps conditional terminators to probabilities, LoopSimplify can insert + // blocks, but it does so only by splitting existing blocks and edges. This + // results in the interesting property that all new terminators inserted are + // unconditional branches which do not appear in BPI. All deletions are + // handled via ValueHandle callbacks w/in BPI. + PA.preserve<BranchProbabilityAnalysis>(); return PA; } diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp index da7ed2bd1652..e39ade523714 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -1,9 +1,8 @@ //===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -45,6 +44,8 @@ using namespace llvm; // TODO: Should these be here or in LoopUnroll? STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); +STATISTIC(NumUnrolledWithHeader, "Number of loops unrolled without a " + "conditional latch (completely or otherwise)"); static cl::opt<bool> UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden, @@ -94,66 +95,6 @@ void llvm::remapInstruction(Instruction *I, ValueToValueMapTy &VMap) { } } -/// Folds a basic block into its predecessor if it only has one predecessor, and -/// that predecessor only has one successor. -/// The LoopInfo Analysis that is passed will be kept consistent. -BasicBlock *llvm::foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, - ScalarEvolution *SE, - DominatorTree *DT) { - // Merge basic blocks into their predecessor if there is only one distinct - // pred, and if there is only one distinct successor of the predecessor, and - // if there are no PHI nodes. - BasicBlock *OnlyPred = BB->getSinglePredecessor(); - if (!OnlyPred) return nullptr; - - if (OnlyPred->getTerminator()->getNumSuccessors() != 1) - return nullptr; - - LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into " - << OnlyPred->getName() << "\n"); - - // Resolve any PHI nodes at the start of the block. They are all - // guaranteed to have exactly one entry if they exist, unless there are - // multiple duplicate (but guaranteed to be equal) entries for the - // incoming edges. This occurs when there are multiple edges from - // OnlyPred to OnlySucc. - FoldSingleEntryPHINodes(BB); - - // Delete the unconditional branch from the predecessor... - OnlyPred->getInstList().pop_back(); - - // Make all PHI nodes that referred to BB now refer to Pred as their - // source... - BB->replaceAllUsesWith(OnlyPred); - - // Move all definitions in the successor to the predecessor... - OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList()); - - // OldName will be valid until erased. - StringRef OldName = BB->getName(); - - // Erase the old block and update dominator info. - if (DT) - if (DomTreeNode *DTN = DT->getNode(BB)) { - DomTreeNode *PredDTN = DT->getNode(OnlyPred); - SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end()); - for (auto *DI : Children) - DT->changeImmediateDominator(DI, PredDTN); - - DT->eraseNode(BB); - } - - LI->removeBlock(BB); - - // Inherit predecessor's name if it exists... - if (!OldName.empty() && !OnlyPred->hasName()) - OnlyPred->setName(OldName); - - BB->eraseFromParent(); - - return OnlyPred; -} - /// Check if unrolling created a situation where we need to insert phi nodes to /// preserve LCSSA form. /// \param Blocks is a vector of basic blocks representing unrolled loop. @@ -332,12 +273,11 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, /// /// If RemainderLoop is non-null, it will receive the remainder loop (if /// required and not fully unrolled). -LoopUnrollResult llvm::UnrollLoop( - Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, - bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst, - unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder, - LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, - OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop) { +LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, + ScalarEvolution *SE, DominatorTree *DT, + AssumptionCache *AC, + OptimizationRemarkEmitter *ORE, + bool PreserveLCSSA, Loop **RemainderLoop) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { @@ -357,28 +297,46 @@ LoopUnrollResult llvm::UnrollLoop( return LoopUnrollResult::Unmodified; } - // The current loop unroll pass can only unroll loops with a single latch + // The current loop unroll pass can unroll loops with a single latch or header // that's a conditional branch exiting the loop. // FIXME: The implementation can be extended to work with more complicated // cases, e.g. loops with multiple latches. BasicBlock *Header = L->getHeader(); + BranchInst *HeaderBI = dyn_cast<BranchInst>(Header->getTerminator()); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); - if (!BI || BI->isUnconditional()) { - // The loop-rotate pass can be helpful to avoid this in many cases. + // FIXME: Support loops without conditional latch and multiple exiting blocks. + if (!BI || + (BI->isUnconditional() && (!HeaderBI || HeaderBI->isUnconditional() || + L->getExitingBlock() != Header))) { + LLVM_DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional " + "branch in the latch or header.\n"); + return LoopUnrollResult::Unmodified; + } + + auto CheckLatchSuccessors = [&](unsigned S1, unsigned S2) { + return BI->isConditional() && BI->getSuccessor(S1) == Header && + !L->contains(BI->getSuccessor(S2)); + }; + + // If we have a conditional latch, it must exit the loop. + if (BI && BI->isConditional() && !CheckLatchSuccessors(0, 1) && + !CheckLatchSuccessors(1, 0)) { LLVM_DEBUG( - dbgs() - << " Can't unroll; loop not terminated by a conditional branch.\n"); + dbgs() << "Can't unroll; a conditional latch must exit the loop"); return LoopUnrollResult::Unmodified; } - auto CheckSuccessors = [&](unsigned S1, unsigned S2) { - return BI->getSuccessor(S1) == Header && !L->contains(BI->getSuccessor(S2)); + auto CheckHeaderSuccessors = [&](unsigned S1, unsigned S2) { + return HeaderBI && HeaderBI->isConditional() && + L->contains(HeaderBI->getSuccessor(S1)) && + !L->contains(HeaderBI->getSuccessor(S2)); }; - if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) { - LLVM_DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch" - " exiting the loop can be unrolled\n"); + // If we do not have a conditional latch, the header must exit the loop. + if (BI && !BI->isConditional() && HeaderBI && HeaderBI->isConditional() && + !CheckHeaderSuccessors(0, 1) && !CheckHeaderSuccessors(1, 0)) { + LLVM_DEBUG(dbgs() << "Can't unroll; conditional header must exit the loop"); return LoopUnrollResult::Unmodified; } @@ -389,28 +347,28 @@ LoopUnrollResult llvm::UnrollLoop( return LoopUnrollResult::Unmodified; } - if (TripCount != 0) - LLVM_DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); - if (TripMultiple != 1) - LLVM_DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n"); + if (ULO.TripCount != 0) + LLVM_DEBUG(dbgs() << " Trip Count = " << ULO.TripCount << "\n"); + if (ULO.TripMultiple != 1) + LLVM_DEBUG(dbgs() << " Trip Multiple = " << ULO.TripMultiple << "\n"); // Effectively "DCE" unrolled iterations that are beyond the tripcount // and will never be executed. - if (TripCount != 0 && Count > TripCount) - Count = TripCount; + if (ULO.TripCount != 0 && ULO.Count > ULO.TripCount) + ULO.Count = ULO.TripCount; // Don't enter the unroll code if there is nothing to do. - if (TripCount == 0 && Count < 2 && PeelCount == 0) { + if (ULO.TripCount == 0 && ULO.Count < 2 && ULO.PeelCount == 0) { LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n"); return LoopUnrollResult::Unmodified; } - assert(Count > 0); - assert(TripMultiple > 0); - assert(TripCount == 0 || TripCount % TripMultiple == 0); + assert(ULO.Count > 0); + assert(ULO.TripMultiple > 0); + assert(ULO.TripCount == 0 || ULO.TripCount % ULO.TripMultiple == 0); // Are we eliminating the loop control altogether? - bool CompletelyUnroll = Count == TripCount; + bool CompletelyUnroll = ULO.Count == ULO.TripCount; SmallVector<BasicBlock *, 4> ExitBlocks; L->getExitBlocks(ExitBlocks); std::vector<BasicBlock*> OriginalLoopBlocks = L->getBlocks(); @@ -429,24 +387,29 @@ LoopUnrollResult llvm::UnrollLoop( // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime // flag is specified. - bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); + bool RuntimeTripCount = + (ULO.TripCount == 0 && ULO.Count > 0 && ULO.AllowRuntime); - assert((!RuntimeTripCount || !PeelCount) && + assert((!RuntimeTripCount || !ULO.PeelCount) && "Did not expect runtime trip-count unrolling " "and peeling for the same loop"); bool Peeled = false; - if (PeelCount) { - Peeled = peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA); + if (ULO.PeelCount) { + Peeled = peelLoop(L, ULO.PeelCount, LI, SE, DT, AC, PreserveLCSSA); // Successful peeling may result in a change in the loop preheader/trip // counts. If we later unroll the loop, we want these to be updated. if (Peeled) { - BasicBlock *ExitingBlock = L->getExitingBlock(); + // According to our guards and profitability checks the only + // meaningful exit should be latch block. Other exits go to deopt, + // so we do not worry about them. + BasicBlock *ExitingBlock = L->getLoopLatch(); assert(ExitingBlock && "Loop without exiting block?"); + assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?"); Preheader = L->getLoopPreheader(); - TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); - TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); + ULO.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); + ULO.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); } } @@ -459,7 +422,7 @@ LoopUnrollResult llvm::UnrollLoop( for (auto &I : *BB) if (auto CS = CallSite(&I)) HasConvergent |= CS.isConvergent(); - assert((!HasConvergent || TripMultiple % Count == 0) && + assert((!HasConvergent || ULO.TripMultiple % ULO.Count == 0) && "Unroll count must divide trip multiple if loop contains a " "convergent operation."); }); @@ -468,11 +431,12 @@ LoopUnrollResult llvm::UnrollLoop( UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog : isEpilogProfitable(L); - if (RuntimeTripCount && TripMultiple % Count != 0 && - !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount, - EpilogProfitability, UnrollRemainder, LI, SE, - DT, AC, PreserveLCSSA, RemainderLoop)) { - if (Force) + if (RuntimeTripCount && ULO.TripMultiple % ULO.Count != 0 && + !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount, + EpilogProfitability, ULO.UnrollRemainder, + ULO.ForgetAllSCEV, LI, SE, DT, AC, + PreserveLCSSA, RemainderLoop)) { + if (ULO.Force) RuntimeTripCount = false; else { LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be " @@ -483,35 +447,35 @@ LoopUnrollResult llvm::UnrollLoop( // If we know the trip count, we know the multiple... unsigned BreakoutTrip = 0; - if (TripCount != 0) { - BreakoutTrip = TripCount % Count; - TripMultiple = 0; + if (ULO.TripCount != 0) { + BreakoutTrip = ULO.TripCount % ULO.Count; + ULO.TripMultiple = 0; } else { // Figure out what multiple to use. - BreakoutTrip = TripMultiple = - (unsigned)GreatestCommonDivisor64(Count, TripMultiple); + BreakoutTrip = ULO.TripMultiple = + (unsigned)GreatestCommonDivisor64(ULO.Count, ULO.TripMultiple); } using namespace ore; // Report the unrolling decision. if (CompletelyUnroll) { LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() - << " with trip count " << TripCount << "!\n"); + << " with trip count " << ULO.TripCount << "!\n"); if (ORE) ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), L->getHeader()) << "completely unrolled loop with " - << NV("UnrollCount", TripCount) << " iterations"; + << NV("UnrollCount", ULO.TripCount) << " iterations"; }); - } else if (PeelCount) { + } else if (ULO.PeelCount) { LLVM_DEBUG(dbgs() << "PEELING loop %" << Header->getName() - << " with iteration count " << PeelCount << "!\n"); + << " with iteration count " << ULO.PeelCount << "!\n"); if (ORE) ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(), L->getHeader()) - << " peeled loop by " << NV("PeelCount", PeelCount) + << " peeled loop by " << NV("PeelCount", ULO.PeelCount) << " iterations"; }); } else { @@ -519,24 +483,25 @@ LoopUnrollResult llvm::UnrollLoop( OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), L->getHeader()); return Diag << "unrolled loop by a factor of " - << NV("UnrollCount", Count); + << NV("UnrollCount", ULO.Count); }; LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " - << Count); - if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { + << ULO.Count); + if (ULO.TripMultiple == 0 || BreakoutTrip != ULO.TripMultiple) { LLVM_DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); if (ORE) ORE->emit([&]() { return DiagBuilder() << " with a breakout at trip " << NV("BreakoutTrip", BreakoutTrip); }); - } else if (TripMultiple != 1) { - LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); + } else if (ULO.TripMultiple != 1) { + LLVM_DEBUG(dbgs() << " with " << ULO.TripMultiple << " trips per branch"); if (ORE) ORE->emit([&]() { - return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple) - << " trips per branch"; + return DiagBuilder() + << " with " << NV("TripMultiple", ULO.TripMultiple) + << " trips per branch"; }); } else if (RuntimeTripCount) { LLVM_DEBUG(dbgs() << " with run-time trip count"); @@ -555,11 +520,24 @@ LoopUnrollResult llvm::UnrollLoop( // and if something changes inside them then any of outer loops may also // change. When we forget outermost loop, we also forget all contained loops // and this is what we need here. - if (SE) - SE->forgetTopmostLoop(L); + if (SE) { + if (ULO.ForgetAllSCEV) + SE->forgetAllLoops(); + else + SE->forgetTopmostLoop(L); + } - bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); - BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); + bool ContinueOnTrue; + bool LatchIsExiting = BI->isConditional(); + BasicBlock *LoopExit = nullptr; + if (LatchIsExiting) { + ContinueOnTrue = L->contains(BI->getSuccessor(0)); + LoopExit = BI->getSuccessor(ContinueOnTrue); + } else { + NumUnrolledWithHeader++; + ContinueOnTrue = L->contains(HeaderBI->getSuccessor(0)); + LoopExit = HeaderBI->getSuccessor(ContinueOnTrue); + } // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. @@ -569,11 +547,23 @@ LoopUnrollResult llvm::UnrollLoop( OrigPHINode.push_back(cast<PHINode>(I)); } - std::vector<BasicBlock*> Headers; - std::vector<BasicBlock*> Latches; + std::vector<BasicBlock *> Headers; + std::vector<BasicBlock *> HeaderSucc; + std::vector<BasicBlock *> Latches; Headers.push_back(Header); Latches.push_back(LatchBlock); + if (!LatchIsExiting) { + auto *Term = cast<BranchInst>(Header->getTerminator()); + if (Term->isUnconditional() || L->contains(Term->getSuccessor(0))) { + assert(L->contains(Term->getSuccessor(0))); + HeaderSucc.push_back(Term->getSuccessor(0)); + } else { + assert(L->contains(Term->getSuccessor(1))); + HeaderSucc.push_back(Term->getSuccessor(1)); + } + } + // The current on-the-fly SSA update requires blocks to be processed in // reverse postorder so that LastValueMap contains the correct value at each // exit. @@ -599,7 +589,7 @@ LoopUnrollResult llvm::UnrollLoop( for (Instruction &I : *BB) if (!isa<DbgInfoIntrinsic>(&I)) if (const DILocation *DIL = I.getDebugLoc()) { - auto NewDIL = DIL->cloneWithDuplicationFactor(Count); + auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(ULO.Count); if (NewDIL) I.setDebugLoc(NewDIL.getValue()); else @@ -608,7 +598,7 @@ LoopUnrollResult llvm::UnrollLoop( << DIL->getFilename() << " Line: " << DIL->getLine()); } - for (unsigned It = 1; It != Count; ++It) { + for (unsigned It = 1; It != ULO.Count; ++It) { std::vector<BasicBlock*> NewBlocks; SmallDenseMap<const Loop *, Loop *, 4> NewLoops; NewLoops[L] = L; @@ -663,6 +653,13 @@ LoopUnrollResult llvm::UnrollLoop( if (*BB == LatchBlock) Latches.push_back(New); + // Keep track of the successor of the new header in the current iteration. + for (auto *Pred : predecessors(*BB)) + if (Pred == Header) { + HeaderSucc.push_back(New); + break; + } + NewBlocks.push_back(New); UnrolledLoopBlocks.push_back(New); @@ -699,8 +696,7 @@ LoopUnrollResult llvm::UnrollLoop( if (CompletelyUnroll) { PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); Header->getInstList().erase(PN); - } - else if (Count > 1) { + } else if (ULO.Count > 1) { Value *InVal = PN->removeIncomingValue(LatchBlock, false); // If this value was defined in the loop, take the value defined by the // last iteration of the loop. @@ -713,39 +709,11 @@ LoopUnrollResult llvm::UnrollLoop( } } - // Now that all the basic blocks for the unrolled iterations are in place, - // set up the branches to connect them. - for (unsigned i = 0, e = Latches.size(); i != e; ++i) { - // The original branch was replicated in each unrolled iteration. - BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); - - // The branch destination. - unsigned j = (i + 1) % e; - BasicBlock *Dest = Headers[j]; - bool NeedConditional = true; - - if (RuntimeTripCount && j != 0) { - NeedConditional = false; - } - - // For a complete unroll, make the last iteration end with a branch - // to the exit block. - if (CompletelyUnroll) { - if (j == 0) - Dest = LoopExit; - // If using trip count upper bound to completely unroll, we need to keep - // the conditional branch except the last one because the loop may exit - // after any iteration. - assert(NeedConditional && - "NeedCondition cannot be modified by both complete " - "unrolling and runtime unrolling"); - NeedConditional = (PreserveCondBr && j && !(PreserveOnlyFirst && i != 0)); - } else if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { - // If we know the trip count or a multiple of it, we can safely use an - // unconditional branch for some iterations. - NeedConditional = false; - } - + auto setDest = [LoopExit, ContinueOnTrue](BasicBlock *Src, BasicBlock *Dest, + ArrayRef<BasicBlock *> NextBlocks, + BasicBlock *CurrentHeader, + bool NeedConditional) { + auto *Term = cast<BranchInst>(Src->getTerminator()); if (NeedConditional) { // Update the conditional branch's successor for the following // iteration. @@ -753,9 +721,9 @@ LoopUnrollResult llvm::UnrollLoop( } else { // Remove phi operands at this loop exit if (Dest != LoopExit) { - BasicBlock *BB = Latches[i]; - for (BasicBlock *Succ: successors(BB)) { - if (Succ == Headers[i]) + BasicBlock *BB = Src; + for (BasicBlock *Succ : successors(BB)) { + if (Succ == CurrentHeader) continue; for (PHINode &Phi : Succ->phis()) Phi.removeIncomingValue(BB, false); @@ -765,13 +733,97 @@ LoopUnrollResult llvm::UnrollLoop( BranchInst::Create(Dest, Term); Term->eraseFromParent(); } + }; + + // Now that all the basic blocks for the unrolled iterations are in place, + // set up the branches to connect them. + if (LatchIsExiting) { + // Set up latches to branch to the new header in the unrolled iterations or + // the loop exit for the last latch in a fully unrolled loop. + for (unsigned i = 0, e = Latches.size(); i != e; ++i) { + // The branch destination. + unsigned j = (i + 1) % e; + BasicBlock *Dest = Headers[j]; + bool NeedConditional = true; + + if (RuntimeTripCount && j != 0) { + NeedConditional = false; + } + + // For a complete unroll, make the last iteration end with a branch + // to the exit block. + if (CompletelyUnroll) { + if (j == 0) + Dest = LoopExit; + // If using trip count upper bound to completely unroll, we need to keep + // the conditional branch except the last one because the loop may exit + // after any iteration. + assert(NeedConditional && + "NeedCondition cannot be modified by both complete " + "unrolling and runtime unrolling"); + NeedConditional = + (ULO.PreserveCondBr && j && !(ULO.PreserveOnlyFirst && i != 0)); + } else if (j != BreakoutTrip && + (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) { + // If we know the trip count or a multiple of it, we can safely use an + // unconditional branch for some iterations. + NeedConditional = false; + } + + setDest(Latches[i], Dest, Headers, Headers[i], NeedConditional); + } + } else { + // Setup headers to branch to their new successors in the unrolled + // iterations. + for (unsigned i = 0, e = Headers.size(); i != e; ++i) { + // The branch destination. + unsigned j = (i + 1) % e; + BasicBlock *Dest = HeaderSucc[i]; + bool NeedConditional = true; + + if (RuntimeTripCount && j != 0) + NeedConditional = false; + + if (CompletelyUnroll) + // We cannot drop the conditional branch for the last condition, as we + // may have to execute the loop body depending on the condition. + NeedConditional = j == 0 || ULO.PreserveCondBr; + else if (j != BreakoutTrip && + (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) + // If we know the trip count or a multiple of it, we can safely use an + // unconditional branch for some iterations. + NeedConditional = false; + + setDest(Headers[i], Dest, Headers, Headers[i], NeedConditional); + } + + // Set up latches to branch to the new header in the unrolled iterations or + // the loop exit for the last latch in a fully unrolled loop. + + for (unsigned i = 0, e = Latches.size(); i != e; ++i) { + // The original branch was replicated in each unrolled iteration. + BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); + + // The branch destination. + unsigned j = (i + 1) % e; + BasicBlock *Dest = Headers[j]; + + // When completely unrolling, the last latch becomes unreachable. + if (CompletelyUnroll && j == 0) + new UnreachableInst(Term->getContext(), Term); + else + // Replace the conditional branch with an unconditional one. + BranchInst::Create(Dest, Term); + + Term->eraseFromParent(); + } } // Update dominators of blocks we might reach through exits. // Immediate dominator of such block might change, because we add more // routes which can lead to the exit: we can now reach it from the copied // iterations too. - if (DT && Count > 1) { + if (DT && ULO.Count > 1) { for (auto *BB : OriginalLoopBlocks) { auto *BBDomNode = DT->getNode(BB); SmallVector<BasicBlock *, 16> ChildrenToUpdate; @@ -781,7 +833,9 @@ LoopUnrollResult llvm::UnrollLoop( ChildrenToUpdate.push_back(ChildBB); } BasicBlock *NewIDom; - if (BB == LatchBlock) { + BasicBlock *&TermBlock = LatchIsExiting ? LatchBlock : Header; + auto &TermBlocks = LatchIsExiting ? Latches : Headers; + if (BB == TermBlock) { // The latch is special because we emit unconditional branches in // some cases where the original loop contained a conditional branch. // Since the latch is always at the bottom of the loop, if the latch @@ -789,11 +843,13 @@ LoopUnrollResult llvm::UnrollLoop( // must also be a latch. Specifically, the dominator is the first // latch which ends in a conditional branch, or the last latch if // there is no such latch. - NewIDom = Latches.back(); - for (BasicBlock *IterLatch : Latches) { - Instruction *Term = IterLatch->getTerminator(); + // For loops exiting from the header, we limit the supported loops + // to have a single exiting block. + NewIDom = TermBlocks.back(); + for (BasicBlock *Iter : TermBlocks) { + Instruction *Term = Iter->getTerminator(); if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) { - NewIDom = IterLatch; + NewIDom = Iter; break; } } @@ -810,14 +866,20 @@ LoopUnrollResult llvm::UnrollLoop( } assert(!DT || !UnrollVerifyDomtree || - DT->verify(DominatorTree::VerificationLevel::Fast)); + DT->verify(DominatorTree::VerificationLevel::Fast)); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); // Merge adjacent basic blocks, if possible. for (BasicBlock *Latch : Latches) { - BranchInst *Term = cast<BranchInst>(Latch->getTerminator()); - if (Term->isUnconditional()) { + BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator()); + assert((Term || + (CompletelyUnroll && !LatchIsExiting && Latch == Latches.back())) && + "Need a branch as terminator, except when fully unrolling with " + "unconditional latch"); + if (Term && Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); - if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) { + BasicBlock *Fold = Dest->getUniquePredecessor(); + if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) { // Dest has been folded into Fold. Update our worklists accordingly. std::replace(Latches.begin(), Latches.end(), Dest, Fold); UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(), @@ -829,8 +891,8 @@ LoopUnrollResult llvm::UnrollLoop( // At this point, the code is well formed. We now simplify the unrolled loop, // doing constant propagation and dead code elimination as we go. - simplifyLoopAfterUnroll(L, !CompletelyUnroll && (Count > 1 || Peeled), LI, SE, - DT, AC); + simplifyLoopAfterUnroll(L, !CompletelyUnroll && (ULO.Count > 1 || Peeled), LI, + SE, DT, AC); NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; @@ -878,11 +940,11 @@ LoopUnrollResult llvm::UnrollLoop( // TODO: That potentially might be compile-time expensive. We should try // to fix the loop-simplified form incrementally. - simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA); + simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA); } else { // Simplify loops for which we might've broken loop-simplify form. for (Loop *SubLoop : LoopsToSimplify) - simplifyLoop(SubLoop, DT, LI, SE, AC, PreserveLCSSA); + simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA); } } diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp index e26762639c13..ff49d83f25c5 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -1,9 +1,8 @@ //===-- LoopUnrollAndJam.cpp - Loop unrolling utilities -------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -198,8 +197,8 @@ LoopUnrollResult llvm::UnrollAndJamLoop( if (TripMultiple == 1 || TripMultiple % Count != 0) { if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false, /*UseEpilogRemainder*/ true, - UnrollRemainder, LI, SE, DT, AC, true, - EpilogueLoop)) { + UnrollRemainder, /*ForgetAllSCEV*/ false, + LI, SE, DT, AC, true, EpilogueLoop)) { LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be " "generated when assuming runtime trip count\n"); return LoopUnrollResult::Unmodified; @@ -301,7 +300,7 @@ LoopUnrollResult llvm::UnrollAndJamLoop( for (Instruction &I : *BB) if (!isa<DbgInfoIntrinsic>(&I)) if (const DILocation *DIL = I.getDebugLoc()) { - auto NewDIL = DIL->cloneWithDuplicationFactor(Count); + auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(Count); if (NewDIL) I.setDebugLoc(NewDIL.getValue()); else @@ -539,12 +538,14 @@ LoopUnrollResult llvm::UnrollAndJamLoop( MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end()); MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end()); MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end()); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); while (!MergeBlocks.empty()) { BasicBlock *BB = *MergeBlocks.begin(); BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator()); if (Term && Term->isUnconditional() && L->contains(Term->getSuccessor(0))) { BasicBlock *Dest = Term->getSuccessor(0); - if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) { + BasicBlock *Fold = Dest->getUniquePredecessor(); + if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) { // Don't remove BB and add Fold as they are the same BB assert(Fold == BB); (void)Fold; diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp index 151a285af4e9..005306cf1898 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -1,9 +1,8 @@ //===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -62,6 +61,10 @@ static cl::opt<unsigned> UnrollForcePeelCount( "unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information.")); +static cl::opt<bool> UnrollPeelMultiDeoptExit( + "unroll-peel-multi-deopt-exit", cl::init(false), cl::Hidden, + cl::desc("Allow peeling of loops with multiple deopt exits.")); + // Designates that a Phi is estimated to become invariant after an "infinite" // number of loop iterations (i.e. only may become an invariant if the loop is // fully unrolled). @@ -74,6 +77,22 @@ bool llvm::canPeel(Loop *L) { if (!L->isLoopSimplifyForm()) return false; + if (UnrollPeelMultiDeoptExit) { + SmallVector<BasicBlock *, 4> Exits; + L->getUniqueNonLatchExitBlocks(Exits); + + if (!Exits.empty()) { + // Latch's terminator is a conditional branch, Latch is exiting and + // all non Latch exits ends up with deoptimize. + const BasicBlock *Latch = L->getLoopLatch(); + const BranchInst *T = dyn_cast<BranchInst>(Latch->getTerminator()); + return T && T->isConditional() && L->isLoopExiting(Latch) && + all_of(Exits, [](const BasicBlock *BB) { + return BB->getTerminatingDeoptimizeCall(); + }); + } + } + // Only peel loops that contain a single exit if (!L->getExitingBlock() || !L->getUniqueExitBlock()) return false; @@ -363,41 +382,89 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, unsigned IterNumber, unsigned AvgIters, uint64_t &PeeledHeaderWeight) { + if (!PeeledHeaderWeight) + return; // FIXME: Pick a more realistic distribution. // Currently the proportion of weight we assign to the fall-through // side of the branch drops linearly with the iteration number, and we use // a 0.9 fudge factor to make the drop-off less sharp... - if (PeeledHeaderWeight) { - uint64_t FallThruWeight = - PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9); - uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight; - PeeledHeaderWeight -= ExitWeight; - - unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); - MDBuilder MDB(LatchBR->getContext()); - MDNode *WeightNode = - HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight) - : MDB.createBranchWeights(FallThruWeight, ExitWeight); - LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); - } + uint64_t FallThruWeight = + PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9); + uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight; + PeeledHeaderWeight -= ExitWeight; + + unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); + MDBuilder MDB(LatchBR->getContext()); + MDNode *WeightNode = + HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight) + : MDB.createBranchWeights(FallThruWeight, ExitWeight); + LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); +} + +/// Initialize the weights. +/// +/// \param Header The header block. +/// \param LatchBR The latch branch. +/// \param AvgIters The average number of iterations we expect the loop to have. +/// \param[out] ExitWeight The # of times the edge from Latch to Exit is taken. +/// \param[out] CurHeaderWeight The # of times the header is executed. +static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR, + unsigned AvgIters, uint64_t &ExitWeight, + uint64_t &CurHeaderWeight) { + uint64_t TrueWeight, FalseWeight; + if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) + return; + unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1; + ExitWeight = HeaderIdx ? TrueWeight : FalseWeight; + // The # of times the loop body executes is the sum of the exit block + // is taken and the # of times the backedges are taken. + CurHeaderWeight = TrueWeight + FalseWeight; +} + +/// Update the weights of original Latch block after peeling off all iterations. +/// +/// \param Header The header block. +/// \param LatchBR The latch branch. +/// \param ExitWeight The weight of the edge from Latch to Exit block. +/// \param CurHeaderWeight The # of time the header is executed. +static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR, + uint64_t ExitWeight, uint64_t CurHeaderWeight) { + // Adjust the branch weights on the loop exit. + if (!ExitWeight) + return; + + // The backedge count is the difference of current header weight and + // current loop exit weight. If the current header weight is smaller than + // the current loop exit weight, we mark the loop backedge weight as 1. + uint64_t BackEdgeWeight = 0; + if (ExitWeight < CurHeaderWeight) + BackEdgeWeight = CurHeaderWeight - ExitWeight; + else + BackEdgeWeight = 1; + MDBuilder MDB(LatchBR->getContext()); + unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1; + MDNode *WeightNode = + HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) + : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); + LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); } /// Clones the body of the loop L, putting it between \p InsertTop and \p /// InsertBot. /// \param IterNumber The serial number of the iteration currently being /// peeled off. -/// \param Exit The exit block of the original loop. +/// \param ExitEdges The exit edges of the original loop. /// \param[out] NewBlocks A list of the blocks in the newly created clone /// \param[out] VMap The value map between the loop and the new clone. /// \param LoopBlocks A helper for DFS-traversal of the loop. /// \param LVMap A value-map that maps instructions from the original loop to /// instructions in the last peeled-off iteration. -static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, - BasicBlock *InsertBot, BasicBlock *Exit, - SmallVectorImpl<BasicBlock *> &NewBlocks, - LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, - ValueToValueMapTy &LVMap, DominatorTree *DT, - LoopInfo *LI) { +static void cloneLoopBlocks( + Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, + SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *> > &ExitEdges, + SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, + ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, + LoopInfo *LI) { BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); BasicBlock *PreHeader = L->getLoopPreheader(); @@ -443,9 +510,11 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, // iteration (for every other iteration) BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator()); - unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); - LatchBR->setSuccessor(HeaderIdx, InsertBot); - LatchBR->setSuccessor(1 - HeaderIdx, Exit); + for (unsigned idx = 0, e = LatchBR->getNumSuccessors(); idx < e; ++idx) + if (LatchBR->getSuccessor(idx) == Header) { + LatchBR->setSuccessor(idx, InsertBot); + break; + } if (DT) DT->changeImmediateDominator(InsertBot, NewLatch); @@ -476,14 +545,14 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, // we've just created. Note that this must happen *after* the incoming // values are adjusted, since the value going out of the latch may also be // a value coming into the header. - for (BasicBlock::iterator I = Exit->begin(); isa<PHINode>(I); ++I) { - PHINode *PHI = cast<PHINode>(I); - Value *LatchVal = PHI->getIncomingValueForBlock(Latch); - Instruction *LatchInst = dyn_cast<Instruction>(LatchVal); - if (LatchInst && L->contains(LatchInst)) - LatchVal = VMap[LatchVal]; - PHI->addIncoming(LatchVal, cast<BasicBlock>(VMap[Latch])); - } + for (auto Edge : ExitEdges) + for (PHINode &PHI : Edge.second->phis()) { + Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first); + Instruction *LatchInst = dyn_cast<Instruction>(LatchVal); + if (LatchInst && L->contains(LatchInst)) + LatchVal = VMap[LatchVal]; + PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first])); + } // LastValueMap is updated with the values for the current loop // which are used the next time this function is called. @@ -512,7 +581,20 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, BasicBlock *Header = L->getHeader(); BasicBlock *PreHeader = L->getLoopPreheader(); BasicBlock *Latch = L->getLoopLatch(); - BasicBlock *Exit = L->getUniqueExitBlock(); + SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges; + L->getExitEdges(ExitEdges); + + DenseMap<BasicBlock *, BasicBlock *> ExitIDom; + if (DT) { + assert(L->hasDedicatedExits() && "No dedicated exits?"); + for (auto Edge : ExitEdges) { + if (ExitIDom.count(Edge.second)) + continue; + BasicBlock *BB = DT->getNode(Edge.second)->getIDom()->getBlock(); + assert(L->contains(BB) && "IDom is not in a loop"); + ExitIDom[Edge.second] = BB; + } + } Function *F = Header->getParent(); @@ -577,16 +659,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // newly created branches. BranchInst *LatchBR = cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator()); - unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); - - uint64_t TrueWeight, FalseWeight; uint64_t ExitWeight = 0, CurHeaderWeight = 0; - if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) { - ExitWeight = HeaderIdx ? TrueWeight : FalseWeight; - // The # of times the loop body executes is the sum of the exit block - // weight and the # of times the backedges are taken. - CurHeaderWeight = TrueWeight + FalseWeight; - } + initBranchWeights(Header, LatchBR, PeelCount, ExitWeight, CurHeaderWeight); // For each peeled-off iteration, make a copy of the loop. for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { @@ -602,8 +676,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, else CurHeaderWeight = 1; - cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit, - NewBlocks, LoopBlocks, VMap, LVMap, DT, LI); + cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks, + LoopBlocks, VMap, LVMap, DT, LI); // Remap to use values from the current iteration instead of the // previous one. @@ -614,7 +688,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // latter is the first cloned loop body, as original PreHeader dominates // the original loop body. if (Iter == 0) - DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch])); + for (auto Exit : ExitIDom) + DT->changeImmediateDominator(Exit.first, + cast<BasicBlock>(LVMap[Exit.second])); #ifdef EXPENSIVE_CHECKS assert(DT->verify(DominatorTree::VerificationLevel::Fast)); #endif @@ -645,36 +721,22 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, if (LatchInst && L->contains(LatchInst)) NewVal = LVMap[LatchInst]; - PHI->setIncomingValue(PHI->getBasicBlockIndex(NewPreHeader), NewVal); + PHI->setIncomingValueForBlock(NewPreHeader, NewVal); } - // Adjust the branch weights on the loop exit. - if (ExitWeight) { - // The backedge count is the difference of current header weight and - // current loop exit weight. If the current header weight is smaller than - // the current loop exit weight, we mark the loop backedge weight as 1. - uint64_t BackEdgeWeight = 0; - if (ExitWeight < CurHeaderWeight) - BackEdgeWeight = CurHeaderWeight - ExitWeight; - else - BackEdgeWeight = 1; - MDBuilder MDB(LatchBR->getContext()); - MDNode *WeightNode = - HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) - : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); - LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); - } + fixupBranchWeights(Header, LatchBR, ExitWeight, CurHeaderWeight); - // If the loop is nested, we changed the parent loop, update SE. - if (Loop *ParentLoop = L->getParentLoop()) { - SE->forgetLoop(ParentLoop); + if (Loop *ParentLoop = L->getParentLoop()) + L = ParentLoop; - // FIXME: Incrementally update loop-simplify - simplifyLoop(ParentLoop, DT, LI, SE, AC, PreserveLCSSA); - } else { - // FIXME: Incrementally update loop-simplify - simplifyLoop(L, DT, LI, SE, AC, PreserveLCSSA); - } + // We modified the loop, update SE. + SE->forgetTopmostLoop(L); + + // Finally DomtTree must be correct. + assert(DT->verify(DominatorTree::VerificationLevel::Fast)); + + // FIXME: Incrementally update loop-simplify + simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA); NumPeeled++; diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 00d2fd2fdbac..d22fdb4d52dc 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -1,9 +1,8 @@ //===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -125,11 +124,10 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, // Update the existing PHI node operand with the value from the // new PHI node. How this is done depends on if the existing // PHI node is in the original loop block, or the exit block. - if (L->contains(&PN)) { - PN.setIncomingValue(PN.getBasicBlockIndex(NewPreHeader), NewPN); - } else { + if (L->contains(&PN)) + PN.setIncomingValueForBlock(NewPreHeader, NewPN); + else PN.addIncoming(NewPN, PrologExit); - } } } @@ -265,7 +263,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, // Update the existing PHI node operand with the value from the new PHI // node. Corresponding instruction in epilog loop should be PHI. PHINode *VPN = cast<PHINode>(VMap[&PN]); - VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN); + VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN); } } @@ -426,10 +424,9 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, /// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits /// is populated with all the loop exit blocks other than the LatchExit block. -static bool -canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, - BasicBlock *LatchExit, bool PreserveLCSSA, - bool UseEpilogRemainder) { +static bool canSafelyUnrollMultiExitLoop(Loop *L, BasicBlock *LatchExit, + bool PreserveLCSSA, + bool UseEpilogRemainder) { // We currently have some correctness constrains in unrolling a multi-exit // loop. Check for these below. @@ -437,11 +434,6 @@ canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, // We rely on LCSSA form being preserved when the exit blocks are transformed. if (!PreserveLCSSA) return false; - SmallVector<BasicBlock *, 4> Exits; - L->getUniqueExitBlocks(Exits); - for (auto *BB : Exits) - if (BB != LatchExit) - OtherExits.push_back(BB); // TODO: Support multiple exiting blocks jumping to the `LatchExit` when // UnrollRuntimeMultiExit is true. This will need updating the logic in @@ -471,9 +463,8 @@ static bool canProfitablyUnrollMultiExitLoop( bool PreserveLCSSA, bool UseEpilogRemainder) { #if !defined(NDEBUG) - SmallVector<BasicBlock *, 8> OtherExitsDummyCheck; - assert(canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck, LatchExit, - PreserveLCSSA, UseEpilogRemainder) && + assert(canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA, + UseEpilogRemainder) && "Should be safe to unroll before checking profitability!"); #endif @@ -554,10 +545,10 @@ static bool canProfitablyUnrollMultiExitLoop( bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, - bool UnrollRemainder, LoopInfo *LI, - ScalarEvolution *SE, DominatorTree *DT, - AssumptionCache *AC, bool PreserveLCSSA, - Loop **ResultLoop) { + bool UnrollRemainder, bool ForgetAllSCEV, + LoopInfo *LI, ScalarEvolution *SE, + DominatorTree *DT, AssumptionCache *AC, + bool PreserveLCSSA, Loop **ResultLoop) { LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); LLVM_DEBUG(L->dump()); LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" @@ -597,8 +588,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // These are exit blocks other than the target of the latch exiting block. SmallVector<BasicBlock *, 4> OtherExits; + L->getUniqueNonLatchExitBlocks(OtherExits); bool isMultiExitUnrollingEnabled = - canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, + canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA, UseEpilogRemainder) && canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder); @@ -939,23 +931,24 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, if (OtherExits.size() > 0) { // Generate dedicated exit blocks for the original loop, to preserve // LoopSimplifyForm. - formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA); + formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA); // Generate dedicated exit blocks for the remainder loop if one exists, to // preserve LoopSimplifyForm. if (remainderLoop) - formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA); + formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA); } auto UnrollResult = LoopUnrollResult::Unmodified; if (remainderLoop && UnrollRemainder) { LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n"); UnrollResult = - UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1, - /*Force*/ false, /*AllowRuntime*/ false, - /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true, - /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1, - /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC, - /*ORE*/ nullptr, PreserveLCSSA); + UnrollLoop(remainderLoop, + {/*Count*/ Count - 1, /*TripCount*/ Count - 1, + /*Force*/ false, /*AllowRuntime*/ false, + /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true, + /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1, + /*PeelCount*/ 0, /*UnrollRemainder*/ false, ForgetAllSCEV}, + LI, SE, DT, AC, /*ORE*/ nullptr, PreserveLCSSA); } if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled) diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp index 112e80d27e34..ec226e65f650 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1,9 +1,8 @@ //===-- LoopUtils.cpp - Loop Utility functions -------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -15,10 +14,12 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" @@ -27,7 +28,6 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DIBuilder.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -47,6 +47,7 @@ using namespace llvm::PatternMatch; static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced"; bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, + MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { bool Changed = false; @@ -66,6 +67,9 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, if (isa<IndirectBrInst>(PredBB->getTerminator())) // We cannot rewrite exiting edges from an indirectbr. return false; + if (isa<CallBrInst>(PredBB->getTerminator())) + // We cannot rewrite exiting edges from a callbr. + return false; InLoopPredecessors.push_back(PredBB); } else { @@ -79,7 +83,7 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, return false; auto *NewExitBB = SplitBlockPredecessors( - BB, InLoopPredecessors, ".loopexit", DT, LI, nullptr, PreserveLCSSA); + BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA); if (!NewExitBB) LLVM_DEBUG( @@ -531,10 +535,9 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr, DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); if (DT) { // Update the dominator tree by informing it about the new edge from the - // preheader to the exit. - DTU.insertEdge(Preheader, ExitBlock); - // Inform the dominator tree about the removed edge. - DTU.deleteEdge(Preheader, L->getHeader()); + // preheader to the exit and the removed edge. + DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}, + {DominatorTree::Delete, Preheader, L->getHeader()}}); } // Use a map to unique and a vector to guarantee deterministic ordering. @@ -581,10 +584,14 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr, // dbg.value truncates the range of any dbg.value before the loop where the // loop used to be. This is particularly important for constant values. DIBuilder DIB(*ExitBlock->getModule()); + Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI(); + assert(InsertDbgValueBefore && + "There should be a non-PHI instruction in exit block, else these " + "instructions will have no parent."); for (auto *DVI : DeadDebugInst) - DIB.insertDbgValueIntrinsic( - UndefValue::get(Builder.getInt32Ty()), DVI->getVariable(), - DVI->getExpression(), DVI->getDebugLoc(), ExitBlock->getFirstNonPHI()); + DIB.insertDbgValueIntrinsic(UndefValue::get(Builder.getInt32Ty()), + DVI->getVariable(), DVI->getExpression(), + DVI->getDebugLoc(), InsertDbgValueBefore); // Remove the block from the reference counting scheme, so that we can // delete it freely later. @@ -614,20 +621,28 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr, } Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { - // Only support loops with a unique exiting block, and a latch. - if (!L->getExitingBlock()) - return None; + // Support loops with an exiting latch and other existing exists only + // deoptimize. // Get the branch weights for the loop's backedge. - BranchInst *LatchBR = - dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator()); - if (!LatchBR || LatchBR->getNumSuccessors() != 2) + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return None; + BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator()); + if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch)) return None; assert((LatchBR->getSuccessor(0) == L->getHeader() || LatchBR->getSuccessor(1) == L->getHeader()) && "At least one edge out of the latch must go to the header"); + SmallVector<BasicBlock *, 4> ExitBlocks; + L->getUniqueNonLatchExitBlocks(ExitBlocks); + if (any_of(ExitBlocks, [](const BasicBlock *EB) { + return !EB->getTerminatingDeoptimizeCall(); + })) + return None; + // To estimate the number of times the loop body was executed, we want to // know the number of times the backedge was taken, vs. the number of times // we exited the loop. @@ -668,16 +683,6 @@ bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop, return true; } -/// Adds a 'fast' flag to floating point operations. -static Value *addFastMathFlag(Value *V) { - if (isa<FPMathOperator>(V)) { - FastMathFlags Flags; - Flags.setFast(); - cast<Instruction>(V)->setFastMathFlags(Flags); - } - return V; -} - Value *llvm::createMinMaxOp(IRBuilder<> &Builder, RecurrenceDescriptor::MinMaxRecurrenceKind RK, Value *Left, Value *Right) { @@ -781,9 +786,9 @@ llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, ConstantVector::get(ShuffleMask), "rdx.shuf"); if (Op != Instruction::ICmp && Op != Instruction::FCmp) { - // Floating point operations had to be 'fast' to enable the reduction. - TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op, - TmpVec, Shuf, "bin.rdx")); + // The builder propagates its fast-math-flags setting. + TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf, + "bin.rdx"); } else { assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && "Invalid min/max"); @@ -804,13 +809,9 @@ Value *llvm::createSimpleTargetReduction( ArrayRef<Value *> RedOps) { assert(isa<VectorType>(Src->getType()) && "Type must be a vector"); - Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType()); std::function<Value *()> BuildFunc; using RD = RecurrenceDescriptor; RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; - // TODO: Support creating ordered reductions. - FastMathFlags FMFFast; - FMFFast.setFast(); switch (Opcode) { case Instruction::Add: @@ -830,15 +831,15 @@ Value *llvm::createSimpleTargetReduction( break; case Instruction::FAdd: BuildFunc = [&]() { - auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src); - cast<CallInst>(Rdx)->setFastMathFlags(FMFFast); + auto Rdx = Builder.CreateFAddReduce( + Constant::getNullValue(Src->getType()->getVectorElementType()), Src); return Rdx; }; break; case Instruction::FMul: BuildFunc = [&]() { - auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src); - cast<CallInst>(Rdx)->setFastMathFlags(FMFFast); + Type *Ty = Src->getType()->getVectorElementType(); + auto Rdx = Builder.CreateFMulReduce(ConstantFP::get(Ty, 1.0), Src); return Rdx; }; break; @@ -883,6 +884,12 @@ Value *llvm::createTargetReduction(IRBuilder<> &B, RD::RecurrenceKind RecKind = Desc.getRecurrenceKind(); TargetTransformInfo::ReductionFlags Flags; Flags.NoNaN = NoNaN; + + // All ops in the reduction inherit fast-math-flags from the recurrence + // descriptor. + IRBuilder<>::FastMathFlagGuard FMFGuard(B); + B.setFastMathFlags(Desc.getFastMathFlags()); + switch (RecKind) { case RD::RK_FloatAdd: return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags); diff --git a/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp index abbcd5f9e3b8..a9a480a4b7f9 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp @@ -1,9 +1,8 @@ //===- LoopVersioning.cpp - Utility to version a loop ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -281,8 +280,9 @@ public: bool Changed = false; for (Loop *L : Worklist) { const LoopAccessInfo &LAI = LAA->getInfo(L); - if (L->isLoopSimplifyForm() && (LAI.getNumRuntimePointerChecks() || - !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) { + if (L->isLoopSimplifyForm() && !LAI.hasConvergentOp() && + (LAI.getNumRuntimePointerChecks() || + !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) { LoopVersioning LVer(LAI, L, LI, DT, SE); LVer.versionLoop(); LVer.annotateLoopWithNoAlias(); diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp index c852d538b0d1..fe67e191dc62 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp @@ -1,9 +1,8 @@ //===- LowerInvoke.cpp - Eliminate Invoke instructions --------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -53,7 +52,8 @@ static bool runImpl(Function &F) { II->getOperandBundlesAsDefs(OpBundles); // Insert a normal call instruction... CallInst *NewCall = - CallInst::Create(II->getCalledValue(), CallArgs, OpBundles, "", II); + CallInst::Create(II->getFunctionType(), II->getCalledValue(), + CallArgs, OpBundles, "", II); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); diff --git a/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp index 661b4fa5bcb7..0cc085dc366c 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -1,9 +1,8 @@ //===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -73,7 +72,7 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, // Loop Body Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); - Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + Value *Load = LoopBuilder.CreateLoad(LoopOpType, SrcGEP, SrcIsVolatile); Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); @@ -115,7 +114,7 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); Value *SrcGEP = RBuilder.CreateInBoundsGEP( OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); - Value *Load = RBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + Value *Load = RBuilder.CreateLoad(OpTy, SrcGEP, SrcIsVolatile); // Cast destination to operand type and store. PointerType *DstPtrType = PointerType::get(OpTy, DstAS); @@ -182,7 +181,7 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); - Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + Value *Load = LoopBuilder.CreateLoad(LoopOpType, SrcGEP, SrcIsVolatile); Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); @@ -235,7 +234,7 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); Value *SrcGEP = ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset); - Value *Load = ResBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + Value *Load = ResBuilder.CreateLoad(Int8Type, SrcGEP, SrcIsVolatile); Value *DstGEP = ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset); ResBuilder.CreateStore(Load, DstGEP, DstIsVolatile); @@ -293,6 +292,8 @@ static void createMemMoveLoop(Instruction *InsertBefore, BasicBlock *OrigBB = InsertBefore->getParent(); Function *F = OrigBB->getParent(); + Type *EltTy = cast<PointerType>(SrcAddr->getType())->getElementType(); + // Create the a comparison of src and dst, based on which we jump to either // the forward-copy part of the function (if src >= dst) or the backwards-copy // part (if src < dst). @@ -331,9 +332,10 @@ static void createMemMoveLoop(Instruction *InsertBefore, Value *IndexPtr = LoopBuilder.CreateSub( LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr"); Value *Element = LoopBuilder.CreateLoad( - LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element"); - LoopBuilder.CreateStore(Element, - LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr)); + EltTy, LoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, IndexPtr), + "element"); + LoopBuilder.CreateStore( + Element, LoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, IndexPtr)); LoopBuilder.CreateCondBr( LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)), ExitBB, LoopBB); @@ -348,9 +350,10 @@ static void createMemMoveLoop(Instruction *InsertBefore, IRBuilder<> FwdLoopBuilder(FwdLoopBB); PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr"); Value *FwdElement = FwdLoopBuilder.CreateLoad( - FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element"); + EltTy, FwdLoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, FwdCopyPhi), + "element"); FwdLoopBuilder.CreateStore( - FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi)); + FwdElement, FwdLoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, FwdCopyPhi)); Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd( FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment"); FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen), diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp index d019a44fc705..8256e3b5f5af 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp @@ -1,9 +1,8 @@ //===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -17,8 +16,12 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" @@ -28,6 +31,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -58,9 +62,8 @@ static bool IsInRanges(const IntRange &R, // Find the first range whose High field is >= R.High, // then check if the Low field is <= R.Low. If so, we // have a Range that covers R. - auto I = std::lower_bound( - Ranges.begin(), Ranges.end(), R, - [](const IntRange &A, const IntRange &B) { return A.High < B.High; }); + auto I = llvm::lower_bound( + Ranges, R, [](IntRange A, IntRange B) { return A.High < B.High; }); return I != Ranges.end() && I->Low <= R.Low; } @@ -78,6 +81,10 @@ namespace { bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<LazyValueInfoWrapperPass>(); + } + struct CaseRange { ConstantInt* Low; ConstantInt* High; @@ -91,15 +98,18 @@ namespace { using CaseItr = std::vector<CaseRange>::iterator; private: - void processSwitchInst(SwitchInst *SI, SmallPtrSetImpl<BasicBlock*> &DeleteList); + void processSwitchInst(SwitchInst *SI, + SmallPtrSetImpl<BasicBlock *> &DeleteList, + AssumptionCache *AC, LazyValueInfo *LVI); BasicBlock *switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, ConstantInt *UpperBound, Value *Val, BasicBlock *Predecessor, BasicBlock *OrigBlock, BasicBlock *Default, const std::vector<IntRange> &UnreachableRanges); - BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, BasicBlock *OrigBlock, - BasicBlock *Default); + BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, + ConstantInt *LowerBound, ConstantInt *UpperBound, + BasicBlock *OrigBlock, BasicBlock *Default); unsigned Clusterify(CaseVector &Cases, SwitchInst *SI); }; @@ -121,8 +131,12 @@ char LowerSwitch::ID = 0; // Publicly exposed interface to pass... char &llvm::LowerSwitchID = LowerSwitch::ID; -INITIALIZE_PASS(LowerSwitch, "lowerswitch", - "Lower SwitchInst's to branches", false, false) +INITIALIZE_PASS_BEGIN(LowerSwitch, "lowerswitch", + "Lower SwitchInst's to branches", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) +INITIALIZE_PASS_END(LowerSwitch, "lowerswitch", + "Lower SwitchInst's to branches", false, false) // createLowerSwitchPass - Interface to this file... FunctionPass *llvm::createLowerSwitchPass() { @@ -130,6 +144,17 @@ FunctionPass *llvm::createLowerSwitchPass() { } bool LowerSwitch::runOnFunction(Function &F) { + LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI(); + auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>(); + AssumptionCache *AC = ACT ? &ACT->getAssumptionCache(F) : nullptr; + // Prevent LazyValueInfo from using the DominatorTree as LowerSwitch does not + // preserve it and it becomes stale (when available) pretty much immediately. + // Currently the DominatorTree is only used by LowerSwitch indirectly via LVI + // and computeKnownBits to refine isValidAssumeForContext's results. Given + // that the latter can handle some of the simple cases w/o a DominatorTree, + // it's easier to refrain from using the tree than to keep it up to date. + LVI->disableDT(); + bool Changed = false; SmallPtrSet<BasicBlock*, 8> DeleteList; @@ -143,11 +168,12 @@ bool LowerSwitch::runOnFunction(Function &F) { if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) { Changed = true; - processSwitchInst(SI, DeleteList); + processSwitchInst(SI, DeleteList, AC, LVI); } } for (BasicBlock* BB: DeleteList) { + LVI->eraseBlock(BB); DeleteDeadBlock(BB); } @@ -160,10 +186,11 @@ static raw_ostream &operator<<(raw_ostream &O, const LowerSwitch::CaseVector &C) { O << "["; - for (LowerSwitch::CaseVector::const_iterator B = C.begin(), - E = C.end(); B != E; ) { - O << *B->Low << " -" << *B->High; - if (++B != E) O << ", "; + for (LowerSwitch::CaseVector::const_iterator B = C.begin(), E = C.end(); + B != E;) { + O << "[" << B->Low->getValue() << ", " << B->High->getValue() << "]"; + if (++B != E) + O << ", "; } return O << "]"; @@ -179,8 +206,9 @@ static raw_ostream &operator<<(raw_ostream &O, /// 2) Removed if subsequent incoming values now share the same case, i.e., /// multiple outcome edges are condensed into one. This is necessary to keep the /// number of phi values equal to the number of branches to SuccBB. -static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, - unsigned NumMergedCases) { +static void +fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, + const unsigned NumMergedCases = std::numeric_limits<unsigned>::max()) { for (BasicBlock::iterator I = SuccBB->begin(), IE = SuccBB->getFirstNonPHI()->getIterator(); I != IE; ++I) { @@ -222,6 +250,7 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, BasicBlock *Predecessor, BasicBlock *OrigBlock, BasicBlock *Default, const std::vector<IntRange> &UnreachableRanges) { + assert(LowerBound && UpperBound && "Bounds must be initialized"); unsigned Size = End - Begin; if (Size == 1) { @@ -231,13 +260,12 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, // because the bounds already tell us so. if (Begin->Low == LowerBound && Begin->High == UpperBound) { unsigned NumMergedCases = 0; - if (LowerBound && UpperBound) - NumMergedCases = - UpperBound->getSExtValue() - LowerBound->getSExtValue(); + NumMergedCases = UpperBound->getSExtValue() - LowerBound->getSExtValue(); fixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases); return Begin->BB; } - return newLeafBlock(*Begin, Val, OrigBlock, Default); + return newLeafBlock(*Begin, Val, LowerBound, UpperBound, OrigBlock, + Default); } unsigned Mid = Size / 2; @@ -247,8 +275,8 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, LLVM_DEBUG(dbgs() << "RHS: " << RHS << "\n"); CaseRange &Pivot = *(Begin + Mid); - LLVM_DEBUG(dbgs() << "Pivot ==> " << Pivot.Low->getValue() << " -" - << Pivot.High->getValue() << "\n"); + LLVM_DEBUG(dbgs() << "Pivot ==> [" << Pivot.Low->getValue() << ", " + << Pivot.High->getValue() << "]\n"); // NewLowerBound here should never be the integer minimal value. // This is because it is computed from a case range that is never @@ -270,14 +298,10 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, NewUpperBound = LHS.back().High; } - LLVM_DEBUG(dbgs() << "LHS Bounds ==> "; if (LowerBound) { - dbgs() << LowerBound->getSExtValue(); - } else { dbgs() << "NONE"; } dbgs() << " - " - << NewUpperBound->getSExtValue() << "\n"; - dbgs() << "RHS Bounds ==> "; - dbgs() << NewLowerBound->getSExtValue() << " - "; if (UpperBound) { - dbgs() << UpperBound->getSExtValue() << "\n"; - } else { dbgs() << "NONE\n"; }); + LLVM_DEBUG(dbgs() << "LHS Bounds ==> [" << LowerBound->getSExtValue() << ", " + << NewUpperBound->getSExtValue() << "]\n" + << "RHS Bounds ==> [" << NewLowerBound->getSExtValue() + << ", " << UpperBound->getSExtValue() << "]\n"); // Create a new node that checks if the value is < pivot. Go to the // left branch if it is and right branch if not. @@ -305,9 +329,11 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, /// switch's value == the case's value. If not, then it jumps to the default /// branch. At this point in the tree, the value can't be another valid case /// value, so the jump to the "default" branch is warranted. -BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, - BasicBlock* OrigBlock, - BasicBlock* Default) { +BasicBlock *LowerSwitch::newLeafBlock(CaseRange &Leaf, Value *Val, + ConstantInt *LowerBound, + ConstantInt *UpperBound, + BasicBlock *OrigBlock, + BasicBlock *Default) { Function* F = OrigBlock->getParent(); BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock"); F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf); @@ -320,10 +346,14 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, Leaf.Low, "SwitchLeaf"); } else { // Make range comparison - if (Leaf.Low->isMinValue(true /*isSigned*/)) { + if (Leaf.Low == LowerBound) { // Val >= Min && Val <= Hi --> Val <= Hi Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High, "SwitchLeaf"); + } else if (Leaf.High == UpperBound) { + // Val <= Max && Val >= Lo --> Val >= Lo + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SGE, Val, Leaf.Low, + "SwitchLeaf"); } else if (Leaf.Low->isZero()) { // Val >= 0 && Val <= Hi --> Val <=u Hi Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High, @@ -363,14 +393,20 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, return NewLeaf; } -/// Transform simple list of Cases into list of CaseRange's. +/// Transform simple list of \p SI's cases into list of CaseRange's \p Cases. +/// \post \p Cases wouldn't contain references to \p SI's default BB. +/// \returns Number of \p SI's cases that do not reference \p SI's default BB. unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { - unsigned numCmps = 0; + unsigned NumSimpleCases = 0; // Start with "simple" cases - for (auto Case : SI->cases()) + for (auto Case : SI->cases()) { + if (Case.getCaseSuccessor() == SI->getDefaultDest()) + continue; Cases.push_back(CaseRange(Case.getCaseValue(), Case.getCaseValue(), Case.getCaseSuccessor())); + ++NumSimpleCases; + } llvm::sort(Cases, CaseCmp()); @@ -396,60 +432,88 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { Cases.erase(std::next(I), Cases.end()); } - for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { - if (I->Low != I->High) - // A range counts double, since it requires two compares. - ++numCmps; - } - - return numCmps; + return NumSimpleCases; } /// Replace the specified switch instruction with a sequence of chained if-then /// insts in a balanced binary search. void LowerSwitch::processSwitchInst(SwitchInst *SI, - SmallPtrSetImpl<BasicBlock*> &DeleteList) { - BasicBlock *CurBlock = SI->getParent(); - BasicBlock *OrigBlock = CurBlock; - Function *F = CurBlock->getParent(); + SmallPtrSetImpl<BasicBlock *> &DeleteList, + AssumptionCache *AC, LazyValueInfo *LVI) { + BasicBlock *OrigBlock = SI->getParent(); + Function *F = OrigBlock->getParent(); Value *Val = SI->getCondition(); // The value we are switching on... BasicBlock* Default = SI->getDefaultDest(); // Don't handle unreachable blocks. If there are successors with phis, this // would leave them behind with missing predecessors. - if ((CurBlock != &F->getEntryBlock() && pred_empty(CurBlock)) || - CurBlock->getSinglePredecessor() == CurBlock) { - DeleteList.insert(CurBlock); + if ((OrigBlock != &F->getEntryBlock() && pred_empty(OrigBlock)) || + OrigBlock->getSinglePredecessor() == OrigBlock) { + DeleteList.insert(OrigBlock); return; } + // Prepare cases vector. + CaseVector Cases; + const unsigned NumSimpleCases = Clusterify(Cases, SI); + LLVM_DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total non-default cases: " << NumSimpleCases + << "\nCase clusters: " << Cases << "\n"); + // If there is only the default destination, just branch. - if (!SI->getNumCases()) { - BranchInst::Create(Default, CurBlock); + if (Cases.empty()) { + BranchInst::Create(Default, OrigBlock); + // Remove all the references from Default's PHIs to OrigBlock, but one. + fixPhis(Default, OrigBlock, OrigBlock); SI->eraseFromParent(); return; } - // Prepare cases vector. - CaseVector Cases; - unsigned numCmps = Clusterify(Cases, SI); - LLVM_DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() - << ". Total compares: " << numCmps << "\n"); - LLVM_DEBUG(dbgs() << "Cases: " << Cases << "\n"); - (void)numCmps; - ConstantInt *LowerBound = nullptr; ConstantInt *UpperBound = nullptr; - std::vector<IntRange> UnreachableRanges; + bool DefaultIsUnreachableFromSwitch = false; if (isa<UnreachableInst>(Default->getFirstNonPHIOrDbg())) { // Make the bounds tightly fitted around the case value range, because we // know that the value passed to the switch must be exactly one of the case // values. - assert(!Cases.empty()); LowerBound = Cases.front().Low; UpperBound = Cases.back().High; + DefaultIsUnreachableFromSwitch = true; + } else { + // Constraining the range of the value being switched over helps eliminating + // unreachable BBs and minimizing the number of `add` instructions + // newLeafBlock ends up emitting. Running CorrelatedValuePropagation after + // LowerSwitch isn't as good, and also much more expensive in terms of + // compile time for the following reasons: + // 1. it processes many kinds of instructions, not just switches; + // 2. even if limited to icmp instructions only, it will have to process + // roughly C icmp's per switch, where C is the number of cases in the + // switch, while LowerSwitch only needs to call LVI once per switch. + const DataLayout &DL = F->getParent()->getDataLayout(); + KnownBits Known = computeKnownBits(Val, DL, /*Depth=*/0, AC, SI); + // TODO Shouldn't this create a signed range? + ConstantRange KnownBitsRange = + ConstantRange::fromKnownBits(Known, /*IsSigned=*/false); + const ConstantRange LVIRange = LVI->getConstantRange(Val, OrigBlock, SI); + ConstantRange ValRange = KnownBitsRange.intersectWith(LVIRange); + // We delegate removal of unreachable non-default cases to other passes. In + // the unlikely event that some of them survived, we just conservatively + // maintain the invariant that all the cases lie between the bounds. This + // may, however, still render the default case effectively unreachable. + APInt Low = Cases.front().Low->getValue(); + APInt High = Cases.back().High->getValue(); + APInt Min = APIntOps::smin(ValRange.getSignedMin(), Low); + APInt Max = APIntOps::smax(ValRange.getSignedMax(), High); + + LowerBound = ConstantInt::get(SI->getContext(), Min); + UpperBound = ConstantInt::get(SI->getContext(), Max); + DefaultIsUnreachableFromSwitch = (Min + (NumSimpleCases - 1) == Max); + } + + std::vector<IntRange> UnreachableRanges; + if (DefaultIsUnreachableFromSwitch) { DenseMap<BasicBlock *, unsigned> Popularity; unsigned MaxPop = 0; BasicBlock *PopSucc = nullptr; @@ -496,8 +560,10 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, #endif // As the default block in the switch is unreachable, update the PHI nodes - // (remove the entry to the default block) to reflect this. - Default->removePredecessor(OrigBlock); + // (remove all of the references to the default block) to reflect this. + const unsigned NumDefaultEdges = SI->getNumCases() + 1 - NumSimpleCases; + for (unsigned I = 0; I < NumDefaultEdges; ++I) + Default->removePredecessor(OrigBlock); // Use the most popular block as the new default, reducing the number of // cases. @@ -510,7 +576,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, // If there are no cases left, just branch. if (Cases.empty()) { - BranchInst::Create(Default, CurBlock); + BranchInst::Create(Default, OrigBlock); SI->eraseFromParent(); // As all the cases have been replaced with a single branch, only keep // one entry in the PHI nodes. @@ -518,12 +584,12 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, PopSucc->removePredecessor(OrigBlock); return; } - } - unsigned NrOfDefaults = (SI->getDefaultDest() == Default) ? 1 : 0; - for (const auto &Case : SI->cases()) - if (Case.getCaseSuccessor() == Default) - NrOfDefaults++; + // If the condition was a PHI node with the switch block as a predecessor + // removing predecessors may have caused the condition to be erased. + // Getting the condition value again here protects against that. + Val = SI->getCondition(); + } // Create a new, empty default block so that the new hierarchy of // if-then statements go to this and the PHI nodes are happy. @@ -537,14 +603,14 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, // If there are entries in any PHI nodes for the default edge, make sure // to update them as well. - fixPhis(Default, OrigBlock, NewDefault, NrOfDefaults); + fixPhis(Default, OrigBlock, NewDefault); // Branch to our shiny new if-then stuff... BranchInst::Create(SwitchBlock, OrigBlock); // We are now done with the switch instruction, delete it. BasicBlock *OldDefault = SI->getDefaultDest(); - CurBlock->getInstList().erase(SI); + OrigBlock->getInstList().erase(SI); // If the Default block has no more predecessors just add it to DeleteList. if (pred_begin(OldDefault) == pred_end(OldDefault)) diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp index 23145e584751..cd2c81b6abc8 100644 --- a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp @@ -1,9 +1,8 @@ //===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp index 88d595ee02ab..c0b7edc547fd 100644 --- a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp +++ b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp @@ -1,9 +1,8 @@ //===- MetaRenamer.cpp - Rename everything with metasyntatic names --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp index ae5e72ea4d30..c84beceee191 100644 --- a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -1,9 +1,8 @@ //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -28,44 +27,24 @@ static void appendToGlobalArray(const char *Array, Module &M, Function *F, // Get the current set of static global constructors and add the new ctor // to the list. SmallVector<Constant *, 16> CurrentCtors; - StructType *EltTy; + StructType *EltTy = StructType::get( + IRB.getInt32Ty(), PointerType::getUnqual(FnTy), IRB.getInt8PtrTy()); if (GlobalVariable *GVCtor = M.getNamedGlobal(Array)) { - ArrayType *ATy = cast<ArrayType>(GVCtor->getValueType()); - StructType *OldEltTy = cast<StructType>(ATy->getElementType()); - // Upgrade a 2-field global array type to the new 3-field format if needed. - if (Data && OldEltTy->getNumElements() < 3) - EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy), - IRB.getInt8PtrTy()); - else - EltTy = OldEltTy; if (Constant *Init = GVCtor->getInitializer()) { unsigned n = Init->getNumOperands(); CurrentCtors.reserve(n + 1); - for (unsigned i = 0; i != n; ++i) { - auto Ctor = cast<Constant>(Init->getOperand(i)); - if (EltTy != OldEltTy) - Ctor = - ConstantStruct::get(EltTy, Ctor->getAggregateElement((unsigned)0), - Ctor->getAggregateElement(1), - Constant::getNullValue(IRB.getInt8PtrTy())); - CurrentCtors.push_back(Ctor); - } + for (unsigned i = 0; i != n; ++i) + CurrentCtors.push_back(cast<Constant>(Init->getOperand(i))); } GVCtor->eraseFromParent(); - } else { - // Use the new three-field struct if there isn't one already. - EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy), - IRB.getInt8PtrTy()); } - // Build a 2 or 3 field global_ctor entry. We don't take a comdat key. + // Build a 3 field global_ctor entry. We don't take a comdat key. Constant *CSVals[3]; CSVals[0] = IRB.getInt32(Priority); CSVals[1] = F; - // FIXME: Drop support for the two element form in LLVM 4.0. - if (EltTy->getNumElements() >= 3) - CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy()) - : Constant::getNullValue(IRB.getInt8PtrTy()); + CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy()) + : Constant::getNullValue(IRB.getInt8PtrTy()); Constant *RuntimeCtorInit = ConstantStruct::get(EltTy, makeArrayRef(CSVals, EltTy->getNumElements())); @@ -127,36 +106,24 @@ void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) { appendToUsedList(M, "llvm.compiler.used", Values); } -Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) { - if (isa<Function>(FuncOrBitcast)) - return cast<Function>(FuncOrBitcast); - FuncOrBitcast->print(errs()); - errs() << '\n'; - std::string Err; - raw_string_ostream Stream(Err); - Stream << "Sanitizer interface function redefined: " << *FuncOrBitcast; - report_fatal_error(Err); -} - -Function *llvm::declareSanitizerInitFunction(Module &M, StringRef InitName, - ArrayRef<Type *> InitArgTypes) { +FunctionCallee +llvm::declareSanitizerInitFunction(Module &M, StringRef InitName, + ArrayRef<Type *> InitArgTypes) { assert(!InitName.empty() && "Expected init function name"); - Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction( + return M.getOrInsertFunction( InitName, FunctionType::get(Type::getVoidTy(M.getContext()), InitArgTypes, false), - AttributeList())); - F->setLinkage(Function::ExternalLinkage); - return F; + AttributeList()); } -std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions( +std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions( Module &M, StringRef CtorName, StringRef InitName, ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, StringRef VersionCheckName) { assert(!InitName.empty() && "Expected init function name"); assert(InitArgs.size() == InitArgTypes.size() && "Sanitizer's init function expects different number of arguments"); - Function *InitFunction = + FunctionCallee InitFunction = declareSanitizerInitFunction(M, InitName, InitArgTypes); Function *Ctor = Function::Create( FunctionType::get(Type::getVoidTy(M.getContext()), false), @@ -165,20 +132,19 @@ std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions( IRBuilder<> IRB(ReturnInst::Create(M.getContext(), CtorBB)); IRB.CreateCall(InitFunction, InitArgs); if (!VersionCheckName.empty()) { - Function *VersionCheckFunction = - checkSanitizerInterfaceFunction(M.getOrInsertFunction( - VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false), - AttributeList())); + FunctionCallee VersionCheckFunction = M.getOrInsertFunction( + VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false), + AttributeList()); IRB.CreateCall(VersionCheckFunction, {}); } return std::make_pair(Ctor, InitFunction); } -std::pair<Function *, Function *> +std::pair<Function *, FunctionCallee> llvm::getOrCreateSanitizerCtorAndInitFunctions( Module &M, StringRef CtorName, StringRef InitName, ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, - function_ref<void(Function *, Function *)> FunctionsCreatedCallback, + function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback, StringRef VersionCheckName) { assert(!CtorName.empty() && "Expected ctor function name"); @@ -189,7 +155,8 @@ llvm::getOrCreateSanitizerCtorAndInitFunctions( Ctor->getReturnType() == Type::getVoidTy(M.getContext())) return {Ctor, declareSanitizerInitFunction(M, InitName, InitArgTypes)}; - Function *Ctor, *InitFunction; + Function *Ctor; + FunctionCallee InitFunction; std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions( M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName); FunctionsCreatedCallback(Ctor, InitFunction); @@ -208,9 +175,10 @@ Function *llvm::getOrCreateInitFunction(Module &M, StringRef Name) { } return F; } - Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - Name, AttributeList(), Type::getVoidTy(M.getContext()))); - F->setLinkage(Function::ExternalLinkage); + Function *F = + cast<Function>(M.getOrInsertFunction(Name, AttributeList(), + Type::getVoidTy(M.getContext())) + .getCallee()); appendToGlobalCtors(M, F, 0); diff --git a/contrib/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp b/contrib/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp index 34dc1cccdd5b..ac8991e9d475 100644 --- a/contrib/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp +++ b/contrib/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp @@ -1,9 +1,8 @@ //===- NameAnonGlobals.cpp - ThinLTO Support: Name Unnamed Globals --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp index 585ce6b4c118..bdf24d80bd17 100644 --- a/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp +++ b/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp @@ -1,9 +1,8 @@ //===-- PredicateInfo.cpp - PredicateInfo Builder--------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------===// // @@ -474,7 +473,8 @@ void PredicateInfo::buildPredicateInfo() { } for (auto &Assume : AC.assumptions()) { if (auto *II = dyn_cast_or_null<IntrinsicInst>(Assume)) - processAssume(II, II->getParent(), OpsToRename); + if (DT.isReachableFromEntry(II->getParent())) + processAssume(II, II->getParent(), OpsToRename); } // Now rename all our operations. renameUses(OpsToRename); @@ -489,8 +489,10 @@ void PredicateInfo::buildPredicateInfo() { // tricky (FIXME). static Function *getCopyDeclaration(Module *M, Type *Ty) { std::string Name = "llvm.ssa.copy." + utostr((uintptr_t) Ty); - return cast<Function>(M->getOrInsertFunction( - Name, getType(M->getContext(), Intrinsic::ssa_copy, Ty))); + return cast<Function>( + M->getOrInsertFunction(Name, + getType(M->getContext(), Intrinsic::ssa_copy, Ty)) + .getCallee()); } // Given the renaming stack, make all the operands currently on the stack real @@ -633,7 +635,7 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) { // uses in the same instruction do not have a strict sort order // currently and will be considered equal. We could get rid of the // stable sort by creating one if we wanted. - std::stable_sort(OrderedUses.begin(), OrderedUses.end(), Compare); + llvm::stable_sort(OrderedUses, Compare); SmallVector<ValueDFS, 8> RenameStack; // For each use, sorted into dfs order, push values and replaces uses with // top of stack, which will represent the reaching def. diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 91e4f4254b3e..d58e1ea574ef 100644 --- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -1,9 +1,8 @@ //===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -114,7 +113,6 @@ struct AllocaInfo { BasicBlock *OnlyBlock; bool OnlyUsedInOneBlock; - Value *AllocaPointerVal; TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares; void clear() { @@ -123,7 +121,6 @@ struct AllocaInfo { OnlyStore = nullptr; OnlyBlock = nullptr; OnlyUsedInOneBlock = true; - AllocaPointerVal = nullptr; DbgDeclares.clear(); } @@ -141,14 +138,12 @@ struct AllocaInfo { if (StoreInst *SI = dyn_cast<StoreInst>(User)) { // Remember the basic blocks which define new values for the alloca DefiningBlocks.push_back(SI->getParent()); - AllocaPointerVal = SI->getOperand(0); OnlyStore = SI; } else { LoadInst *LI = cast<LoadInst>(User); // Otherwise it must be a load instruction, keep track of variable // reads. UsingBlocks.push_back(LI->getParent()); - AllocaPointerVal = LI; } if (OnlyUsedInOneBlock) { @@ -254,11 +249,6 @@ struct PromoteMem2Reg { /// to. DenseMap<PHINode *, unsigned> PhiToAllocaMap; - /// If we are updating an AliasSetTracker, then for each alloca that is of - /// pointer type, we keep track of what to copyValue to the inserted PHI - /// nodes here. - std::vector<Value *> PointerAllocaValues; - /// For each alloca, we keep track of the dbg.declare intrinsic that /// describes it, if any, so that we can convert it to a dbg.value /// intrinsic if the alloca gets promoted. @@ -367,10 +357,8 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { Instruction *UserInst = cast<Instruction>(*UI++); - if (!isa<LoadInst>(UserInst)) { - assert(UserInst == OnlyStore && "Should only have load/stores"); + if (UserInst == OnlyStore) continue; - } LoadInst *LI = cast<LoadInst>(UserInst); // Okay, if we have a load from the alloca, we want to replace it with the @@ -390,8 +378,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, Info.UsingBlocks.push_back(StoreBB); continue; } - } else if (LI->getParent() != StoreBB && - !DT.dominates(StoreBB, LI->getParent())) { + } else if (!DT.dominates(StoreBB, LI->getParent())) { // If the load and store are in different blocks, use BB dominance to // check their relationships. If the store doesn't dom the use, bail // out. @@ -429,14 +416,12 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB); DII->eraseFromParent(); - LBI.deleteValue(DII); } // Remove the (now dead) store and alloca. Info.OnlyStore->eraseFromParent(); LBI.deleteValue(Info.OnlyStore); AI->eraseFromParent(); - LBI.deleteValue(AI); return true; } @@ -488,11 +473,10 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, unsigned LoadIdx = LBI.getInstructionIndex(LI); // Find the nearest store that has a lower index than this load. - StoresByIndexTy::iterator I = - std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), - std::make_pair(LoadIdx, - static_cast<StoreInst *>(nullptr)), - less_first()); + StoresByIndexTy::iterator I = llvm::lower_bound( + StoresByIndex, + std::make_pair(LoadIdx, static_cast<StoreInst *>(nullptr)), + less_first()); if (I == StoresByIndex.begin()) { if (StoresByIndex.empty()) // If there are no stores, the load takes the undef value. @@ -535,13 +519,10 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, } AI->eraseFromParent(); - LBI.deleteValue(AI); // The alloca's debuginfo can be removed as well. - for (DbgVariableIntrinsic *DII : Info.DbgDeclares) { + for (DbgVariableIntrinsic *DII : Info.DbgDeclares) DII->eraseFromParent(); - LBI.deleteValue(DII); - } ++NumLocalPromoted; return true; @@ -620,8 +601,8 @@ void PromoteMem2Reg::run() { // dead phi nodes. // Unique the set of defining blocks for efficient lookup. - SmallPtrSet<BasicBlock *, 32> DefBlocks; - DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); + SmallPtrSet<BasicBlock *, 32> DefBlocks(Info.DefiningBlocks.begin(), + Info.DefiningBlocks.end()); // Determine which blocks the value is live in. These are blocks which lead // to uses. @@ -636,10 +617,9 @@ void PromoteMem2Reg::run() { IDF.setDefiningBlocks(DefBlocks); SmallVector<BasicBlock *, 32> PHIBlocks; IDF.calculate(PHIBlocks); - if (PHIBlocks.size() > 1) - llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) { - return BBNumbers.lookup(A) < BBNumbers.lookup(B); - }); + llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) { + return BBNumbers.find(A)->second < BBNumbers.find(B)->second; + }); unsigned CurrentVersion = 0; for (BasicBlock *BB : PHIBlocks) @@ -751,7 +731,7 @@ void PromoteMem2Reg::run() { // basic blocks. Start by sorting the incoming predecessors for efficient // access. auto CompareBBNumbers = [this](BasicBlock *A, BasicBlock *B) { - return BBNumbers.lookup(A) < BBNumbers.lookup(B); + return BBNumbers.find(A)->second < BBNumbers.find(B)->second; }; llvm::sort(Preds, CompareBBNumbers); @@ -759,9 +739,8 @@ void PromoteMem2Reg::run() { // them from the Preds list. for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { // Do a log(n) search of the Preds list for the entry we want. - SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound( - Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i), - CompareBBNumbers); + SmallVectorImpl<BasicBlock *>::iterator EntIt = llvm::lower_bound( + Preds, SomePHI->getIncomingBlock(i), CompareBBNumbers); assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) && "PHI node has entry for a block which is not a predecessor!"); @@ -825,14 +804,11 @@ void PromoteMem2Reg::ComputeLiveInBlocks( break; } - if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - if (LI->getOperand(0) != AI) - continue; - + if (LoadInst *LI = dyn_cast<LoadInst>(I)) // Okay, we found a load before a store to the alloca. It is actually // live into this block. - break; - } + if (LI->getOperand(0) == AI) + break; } } diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp index 9e5fb0e7172d..bffdd115d940 100644 --- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -1,9 +1,8 @@ //===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -350,8 +349,7 @@ LoadAndStorePromoter(ArrayRef<const Instruction *> Insts, SSA.Initialize(SomeVal->getType(), BaseName); } -void LoadAndStorePromoter:: -run(const SmallVectorImpl<Instruction *> &Insts) const { +void LoadAndStorePromoter::run(const SmallVectorImpl<Instruction *> &Insts) { // First step: bucket up uses of the alloca by the block they occur in. // This is important because we have to handle multiple defs/uses in a block // ourselves: SSAUpdater is purely for cross-block references. diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp index 397bac2940a4..917d5e0a1ef0 100644 --- a/contrib/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp @@ -1,9 +1,8 @@ //===- SSAUpdaterBulk.cpp - Unstructured SSA Update Tool ------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp b/contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp index 8c23957ac43e..a1313c77ed77 100644 --- a/contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp @@ -1,9 +1,8 @@ //===- SanitizerStats.cpp - Sanitizer statistics gathering ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -57,8 +56,8 @@ void SanitizerStatReport::create(IRBuilder<> &B, SanitizerStatKind SK) { FunctionType *StatReportTy = FunctionType::get(B.getVoidTy(), Int8PtrTy, false); - Constant *StatReport = M->getOrInsertFunction( - "__sanitizer_stat_report", StatReportTy); + FunctionCallee StatReport = + M->getOrInsertFunction("__sanitizer_stat_report", StatReportTy); auto InitAddr = ConstantExpr::getGetElementPtr( EmptyModuleStatsTy, ModuleStatsGV, @@ -98,8 +97,8 @@ void SanitizerStatReport::finish() { IRBuilder<> B(BB); FunctionType *StatInitTy = FunctionType::get(VoidTy, Int8PtrTy, false); - Constant *StatInit = M->getOrInsertFunction( - "__sanitizer_stat_init", StatInitTy); + FunctionCallee StatInit = + M->getOrInsertFunction("__sanitizer_stat_init", StatInitTy); B.CreateCall(StatInit, ConstantExpr::getBitCast(NewModuleStatsGV, Int8PtrTy)); B.CreateRetVoid(); diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 03b73954321d..11651d040dc0 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1,9 +1,8 @@ //===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -26,8 +25,9 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -66,6 +66,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> #include <cassert> @@ -292,9 +293,13 @@ isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2, /// will be the same as those coming in from ExistPred, an existing predecessor /// of Succ. static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, - BasicBlock *ExistPred) { + BasicBlock *ExistPred, + MemorySSAUpdater *MSSAU = nullptr) { for (PHINode &PN : Succ->phis()) PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred); + if (MSSAU) + if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ)) + MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred); } /// Compute an abstract "cost" of speculating the given instruction, @@ -670,7 +675,8 @@ private: } // end anonymous namespace -static void EraseTerminatorAndDCECond(Instruction *TI) { +static void EraseTerminatorAndDCECond(Instruction *TI, + MemorySSAUpdater *MSSAU = nullptr) { Instruction *Cond = nullptr; if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { Cond = dyn_cast<Instruction>(SI->getCondition()); @@ -683,7 +689,7 @@ static void EraseTerminatorAndDCECond(Instruction *TI) { TI->eraseFromParent(); if (Cond) - RecursivelyDeleteTriviallyDeadInstructions(Cond); + RecursivelyDeleteTriviallyDeadInstructions(Cond, nullptr, MSSAU); } /// Return true if the specified terminator checks @@ -858,7 +864,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( return true; } - SwitchInst *SI = cast<SwitchInst>(TI); + SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI); // Okay, TI has cases that are statically dead, prune them away. SmallPtrSet<Constant *, 16> DeadCases; for (unsigned i = 0, e = PredCases.size(); i != e; ++i) @@ -867,30 +873,13 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI); - // Collect branch weights into a vector. - SmallVector<uint32_t, 8> Weights; - MDNode *MD = SI->getMetadata(LLVMContext::MD_prof); - bool HasWeight = MD && (MD->getNumOperands() == 2 + SI->getNumCases()); - if (HasWeight) - for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e; - ++MD_i) { - ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(MD_i)); - Weights.push_back(CI->getValue().getZExtValue()); - } for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) { --i; if (DeadCases.count(i->getCaseValue())) { - if (HasWeight) { - std::swap(Weights[i->getCaseIndex() + 1], Weights.back()); - Weights.pop_back(); - } i->getCaseSuccessor()->removePredecessor(TI->getParent()); - SI->removeCase(i); + SI.removeCase(i); } } - if (HasWeight && Weights.size() >= 2) - setBranchWeights(SI, Weights); - LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; } @@ -1266,8 +1255,10 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, while (isa<DbgInfoIntrinsic>(I2)) I2 = &*BB2_Itr++; } + // FIXME: Can we define a safety predicate for CallBr? if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) || - (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))) + (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) || + isa<CallBrInst>(I1)) return false; BasicBlock *BIParent = BI->getParent(); @@ -1350,9 +1341,14 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, HoistTerminator: // It may not be possible to hoist an invoke. + // FIXME: Can we define a safety predicate for CallBr? if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) return Changed; + // TODO: callbr hoisting currently disabled pending further study. + if (isa<CallBrInst>(I1)) + return Changed; + for (BasicBlock *Succ : successors(BB1)) { for (PHINode &PN : Succ->phis()) { Value *BB1V = PN.getIncomingValueForBlock(BB1); @@ -1432,9 +1428,10 @@ HoistTerminator: static bool canSinkInstructions( ArrayRef<Instruction *> Insts, DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) { - // Prune out obviously bad instructions to move. Any non-store instruction - // must have exactly one use, and we check later that use is by a single, - // common PHI instruction in the successor. + // Prune out obviously bad instructions to move. Each instruction must have + // exactly zero or one use, and we check later that use is by a single, common + // PHI instruction in the successor. + bool HasUse = !Insts.front()->user_empty(); for (auto *I : Insts) { // These instructions may change or break semantics if moved. if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) || @@ -1444,13 +1441,14 @@ static bool canSinkInstructions( // Conservatively return false if I is an inline-asm instruction. Sinking // and merging inline-asm instructions can potentially create arguments // that cannot satisfy the inline-asm constraints. - if (const auto *C = dyn_cast<CallInst>(I)) + if (const auto *C = dyn_cast<CallBase>(I)) if (C->isInlineAsm()) return false; - // Everything must have only one use too, apart from stores which - // have no uses. - if (!isa<StoreInst>(I) && !I->hasOneUse()) + // Each instruction must have zero or one use. + if (HasUse && !I->hasOneUse()) + return false; + if (!HasUse && !I->user_empty()) return false; } @@ -1459,11 +1457,11 @@ static bool canSinkInstructions( if (!I->isSameOperationAs(I0)) return false; - // All instructions in Insts are known to be the same opcode. If they aren't - // stores, check the only user of each is a PHI or in the same block as the - // instruction, because if a user is in the same block as an instruction - // we're contemplating sinking, it must already be determined to be sinkable. - if (!isa<StoreInst>(I0)) { + // All instructions in Insts are known to be the same opcode. If they have a + // use, check that the only user is a PHI or in the same block as the + // instruction, because if a user is in the same block as an instruction we're + // contemplating sinking, it must already be determined to be sinkable. + if (HasUse) { auto *PNUse = dyn_cast<PHINode>(*I0->user_begin()); auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0); if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool { @@ -1507,7 +1505,7 @@ static bool canSinkInstructions( // We can't create a PHI from this GEP. return false; // Don't create indirect calls! The called value is the final operand. - if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OI == OE - 1) { + if (isa<CallBase>(I0) && OI == OE - 1) { // FIXME: if the call was *already* indirect, we should do this. return false; } @@ -1541,7 +1539,7 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { // it is slightly over-aggressive - it gets confused by commutative instructions // so double-check it here. Instruction *I0 = Insts.front(); - if (!isa<StoreInst>(I0)) { + if (!I0->user_empty()) { auto *PNUse = dyn_cast<PHINode>(*I0->user_begin()); if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool { auto *U = cast<Instruction>(*I->user_begin()); @@ -1599,11 +1597,10 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { I0->andIRFlags(I); } - if (!isa<StoreInst>(I0)) { + if (!I0->user_empty()) { // canSinkLastInstruction checked that all instructions were used by // one and only one PHI node. Find that now, RAUW it to our common // instruction and nuke it. - assert(I0->hasOneUse()); auto *PN = cast<PHINode>(*I0->user_begin()); PN->replaceAllUsesWith(I0); PN->eraseFromParent(); @@ -2203,7 +2200,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL, BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge", RealDest->getParent(), RealDest); - BranchInst::Create(RealDest, EdgeBB); + BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB); + CritEdgeBranch->setDebugLoc(BI->getDebugLoc()); // Update PHI nodes. AddPredecessorToBlock(RealDest, EdgeBB, BB); @@ -2539,7 +2537,8 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, /// If this basic block is simple enough, and if a predecessor branches to us /// and one of our successors, fold the block into the predecessor and use /// logical operations to pick the right destination. -bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { +bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, + unsigned BonusInstThreshold) { BasicBlock *BB = BI->getParent(); const unsigned PredCount = pred_size(BB); @@ -2594,7 +2593,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { // unconditionally. We denote all involved instructions except the condition // as "bonus instructions", and only allow this transformation when the // number of the bonus instructions we'll need to create when cloning into - // each predecessor does not exceed a certain threshold. + // each predecessor does not exceed a certain threshold. unsigned NumBonusInsts = 0; for (auto I = BB->begin(); Cond != &*I; ++I) { // Ignore dbg intrinsics. @@ -2611,7 +2610,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { // and Cond. // Account for the cost of duplicating this instruction into each - // predecessor. + // predecessor. NumBonusInsts += PredCount; // Early exits once we reach the limit. if (NumBonusInsts > BonusInstThreshold) @@ -2750,7 +2749,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { (SuccFalseWeight + SuccTrueWeight) + PredTrueWeight * SuccFalseWeight); } - AddPredecessorToBlock(TrueDest, PredBlock, BB); + AddPredecessorToBlock(TrueDest, PredBlock, BB, MSSAU); PBI->setSuccessor(0, TrueDest); } if (PBI->getSuccessor(1) == BB) { @@ -2765,7 +2764,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { // FalseWeight is FalseWeight for PBI * FalseWeight for BI. NewWeights.push_back(PredFalseWeight * SuccFalseWeight); } - AddPredecessorToBlock(FalseDest, PredBlock, BB); + AddPredecessorToBlock(FalseDest, PredBlock, BB, MSSAU); PBI->setSuccessor(1, FalseDest); } if (NewWeights.size() == 2) { @@ -2810,12 +2809,17 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { } } // Update PHI Node. - PHIs[i]->setIncomingValue(PHIs[i]->getBasicBlockIndex(PBI->getParent()), - MergedCond); + PHIs[i]->setIncomingValueForBlock(PBI->getParent(), MergedCond); } + + // PBI is changed to branch to TrueDest below. Remove itself from + // potential phis from all other successors. + if (MSSAU) + MSSAU->changeCondBranchToUnconditionalTo(PBI, TrueDest); + // Change PBI from Conditional to Unconditional. BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI); - EraseTerminatorAndDCECond(PBI); + EraseTerminatorAndDCECond(PBI, MSSAU); PBI = New_PBI; } @@ -3430,7 +3434,7 @@ static bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond, KeepEdge2 = nullptr; else Succ->removePredecessor(OldTerm->getParent(), - /*DontDeleteUselessPHIs=*/true); + /*KeepOneInputPHIs=*/true); } IRBuilder<> Builder(OldTerm); @@ -3622,20 +3626,16 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt( // the switch to the merge point on the compared value. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB); - SmallVector<uint64_t, 8> Weights; - bool HasWeights = HasBranchWeights(SI); - if (HasWeights) { - GetBranchWeights(SI, Weights); - if (Weights.size() == 1 + SI->getNumCases()) { - // Split weight for default case to case for "Cst". - Weights[0] = (Weights[0] + 1) >> 1; - Weights.push_back(Weights[0]); - - SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); - setBranchWeights(SI, MDWeights); + { + SwitchInstProfUpdateWrapper SIW(*SI); + auto W0 = SIW.getSuccessorWeight(0); + SwitchInstProfUpdateWrapper::CaseWeightOpt NewW; + if (W0) { + NewW = ((uint64_t(*W0) + 1) >> 1); + SIW.setSuccessorWeight(0, *NewW); } + SIW.addCase(Cst, NewBB, NewW); } - SI->addCase(Cst, NewBB); // NewBB branches to the phi block, add the uncond branch and the phi entry. Builder.SetInsertPoint(NewBB); @@ -4184,24 +4184,28 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { Changed = true; } } else { + Value* Cond = BI->getCondition(); if (BI->getSuccessor(0) == BB) { + Builder.CreateAssumption(Builder.CreateNot(Cond)); Builder.CreateBr(BI->getSuccessor(1)); EraseTerminatorAndDCECond(BI); } else if (BI->getSuccessor(1) == BB) { + Builder.CreateAssumption(Cond); Builder.CreateBr(BI->getSuccessor(0)); EraseTerminatorAndDCECond(BI); Changed = true; } } } else if (auto *SI = dyn_cast<SwitchInst>(TI)) { - for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) { + SwitchInstProfUpdateWrapper SU(*SI); + for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) { if (i->getCaseSuccessor() != BB) { ++i; continue; } - BB->removePredecessor(SI->getParent()); - i = SI->removeCase(i); - e = SI->case_end(); + BB->removePredecessor(SU->getParent()); + i = SU.removeCase(i); + e = SU->case_end(); Changed = true; } } else if (auto *II = dyn_cast<InvokeInst>(TI)) { @@ -4435,33 +4439,20 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, return true; } - SmallVector<uint64_t, 8> Weights; - bool HasWeight = HasBranchWeights(SI); - if (HasWeight) { - GetBranchWeights(SI, Weights); - HasWeight = (Weights.size() == 1 + SI->getNumCases()); - } + if (DeadCases.empty()) + return false; - // Remove dead cases from the switch. + SwitchInstProfUpdateWrapper SIW(*SI); for (ConstantInt *DeadCase : DeadCases) { SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase); assert(CaseI != SI->case_default() && "Case was not found. Probably mistake in DeadCases forming."); - if (HasWeight) { - std::swap(Weights[CaseI->getCaseIndex() + 1], Weights.back()); - Weights.pop_back(); - } - // Prune unused values from PHI nodes. CaseI->getCaseSuccessor()->removePredecessor(SI->getParent()); - SI->removeCase(CaseI); - } - if (HasWeight && Weights.size() >= 2) { - SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); - setBranchWeights(SI, MDWeights); + SIW.removeCase(CaseI); } - return !DeadCases.empty(); + return true; } /// If BB would be eligible for simplification by @@ -5034,7 +5025,7 @@ SwitchLookupTable::SwitchLookupTable( ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize); Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); - Array = new GlobalVariable(M, ArrayTy, /*constant=*/true, + Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true, GlobalVariable::PrivateLinkage, Initializer, "switch.table." + FuncName); Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); @@ -5091,7 +5082,9 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { Value *GEPIndices[] = {Builder.getInt32(0), Index}; Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array, GEPIndices, "switch.gep"); - return Builder.CreateLoad(GEP, "switch.load"); + return Builder.CreateLoad( + cast<ArrayType>(Array->getValueType())->getElementType(), GEP, + "switch.load"); } } llvm_unreachable("Unknown lookup table kind!"); @@ -5425,7 +5418,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later, // do not delete PHINodes here. SI->getDefaultDest()->removePredecessor(SI->getParent(), - /*DontDeleteUselessPHIs=*/true); + /*KeepOneInputPHIs=*/true); } bool ReturnedEarly = false; @@ -5533,25 +5526,23 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, // Now we have signed numbers that have been shifted so that, given enough // precision, there are no negative values. Since the rest of the transform // is bitwise only, we switch now to an unsigned representation. - uint64_t GCD = 0; - for (auto &V : Values) - GCD = GreatestCommonDivisor64(GCD, (uint64_t)V); - // This transform can be done speculatively because it is so cheap - it results - // in a single rotate operation being inserted. This can only happen if the - // factor extracted is a power of 2. - // FIXME: If the GCD is an odd number we can multiply by the multiplicative - // inverse of GCD and then perform this transform. + // This transform can be done speculatively because it is so cheap - it + // results in a single rotate operation being inserted. // FIXME: It's possible that optimizing a switch on powers of two might also // be beneficial - flag values are often powers of two and we could use a CLZ // as the key function. - if (GCD <= 1 || !isPowerOf2_64(GCD)) - // No common divisor found or too expensive to compute key function. - return false; - unsigned Shift = Log2_64(GCD); + // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than + // one element and LLVM disallows duplicate cases, Shift is guaranteed to be + // less than 64. + unsigned Shift = 64; for (auto &V : Values) - V = (int64_t)((uint64_t)V >> Shift); + Shift = std::min(Shift, countTrailingZeros((uint64_t)V)); + assert(Shift < 64); + if (Shift > 0) + for (auto &V : Values) + V = (int64_t)((uint64_t)V >> Shift); if (!isSwitchDense(Values)) // Transform didn't create a dense switch. @@ -5796,7 +5787,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold)) + if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold)) return requestResimplify(); return false; } @@ -5860,7 +5851,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold)) + if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold)) return requestResimplify(); // We have a conditional branch to two blocks that are only reachable diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index 7faf291e73d9..cbb114f9a47a 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -1,9 +1,8 @@ //===-- SimplifyIndVar.cpp - Induction variable simplification ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -23,6 +22,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -80,7 +80,8 @@ namespace { bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand); bool replaceIVUserWithLoopInvariant(Instruction *UseInst); - bool eliminateOverflowIntrinsic(CallInst *CI); + bool eliminateOverflowIntrinsic(WithOverflowInst *WO); + bool eliminateSaturatingIntrinsic(SaturatingInst *SI); bool eliminateTrunc(TruncInst *TI); bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); bool makeIVComparisonInvariant(ICmpInst *ICmp, Value *IVOperand); @@ -401,61 +402,29 @@ void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand, replaceSRemWithURem(Rem); } -bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) { - auto *F = CI->getCalledFunction(); - if (!F) - return false; - - typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)( - const SCEV *, const SCEV *, SCEV::NoWrapFlags, unsigned); - typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)( - const SCEV *, Type *, unsigned); - - OperationFunctionTy Operation; - ExtensionFunctionTy Extension; - - Instruction::BinaryOps RawOp; - - // We always have exactly one of nsw or nuw. If NoSignedOverflow is false, we - // have nuw. - bool NoSignedOverflow; - - switch (F->getIntrinsicID()) { +static bool willNotOverflow(ScalarEvolution *SE, Instruction::BinaryOps BinOp, + bool Signed, const SCEV *LHS, const SCEV *RHS) { + const SCEV *(ScalarEvolution::*Operation)(const SCEV *, const SCEV *, + SCEV::NoWrapFlags, unsigned); + switch (BinOp) { default: - return false; - - case Intrinsic::sadd_with_overflow: - Operation = &ScalarEvolution::getAddExpr; - Extension = &ScalarEvolution::getSignExtendExpr; - RawOp = Instruction::Add; - NoSignedOverflow = true; - break; - - case Intrinsic::uadd_with_overflow: + llvm_unreachable("Unsupported binary op"); + case Instruction::Add: Operation = &ScalarEvolution::getAddExpr; - Extension = &ScalarEvolution::getZeroExtendExpr; - RawOp = Instruction::Add; - NoSignedOverflow = false; break; - - case Intrinsic::ssub_with_overflow: + case Instruction::Sub: Operation = &ScalarEvolution::getMinusSCEV; - Extension = &ScalarEvolution::getSignExtendExpr; - RawOp = Instruction::Sub; - NoSignedOverflow = true; break; - - case Intrinsic::usub_with_overflow: - Operation = &ScalarEvolution::getMinusSCEV; - Extension = &ScalarEvolution::getZeroExtendExpr; - RawOp = Instruction::Sub; - NoSignedOverflow = false; + case Instruction::Mul: + Operation = &ScalarEvolution::getMulExpr; break; } - const SCEV *LHS = SE->getSCEV(CI->getArgOperand(0)); - const SCEV *RHS = SE->getSCEV(CI->getArgOperand(1)); + const SCEV *(ScalarEvolution::*Extension)(const SCEV *, Type *, unsigned) = + Signed ? &ScalarEvolution::getSignExtendExpr + : &ScalarEvolution::getZeroExtendExpr; + // Check ext(LHS op RHS) == ext(LHS) op ext(RHS) auto *NarrowTy = cast<IntegerType>(LHS->getType()); auto *WideTy = IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2); @@ -466,27 +435,32 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) { const SCEV *B = (SE->*Operation)((SE->*Extension)(LHS, WideTy, 0), (SE->*Extension)(RHS, WideTy, 0), SCEV::FlagAnyWrap, 0); + return A == B; +} - if (A != B) +bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) { + const SCEV *LHS = SE->getSCEV(WO->getLHS()); + const SCEV *RHS = SE->getSCEV(WO->getRHS()); + if (!willNotOverflow(SE, WO->getBinaryOp(), WO->isSigned(), LHS, RHS)) return false; // Proved no overflow, nuke the overflow check and, if possible, the overflow // intrinsic as well. BinaryOperator *NewResult = BinaryOperator::Create( - RawOp, CI->getArgOperand(0), CI->getArgOperand(1), "", CI); + WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), "", WO); - if (NoSignedOverflow) + if (WO->isSigned()) NewResult->setHasNoSignedWrap(true); else NewResult->setHasNoUnsignedWrap(true); SmallVector<ExtractValueInst *, 4> ToDelete; - for (auto *U : CI->users()) { + for (auto *U : WO->users()) { if (auto *EVI = dyn_cast<ExtractValueInst>(U)) { if (EVI->getIndices()[0] == 1) - EVI->replaceAllUsesWith(ConstantInt::getFalse(CI->getContext())); + EVI->replaceAllUsesWith(ConstantInt::getFalse(WO->getContext())); else { assert(EVI->getIndices()[0] == 0 && "Only two possibilities!"); EVI->replaceAllUsesWith(NewResult); @@ -498,9 +472,28 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) { for (auto *EVI : ToDelete) EVI->eraseFromParent(); - if (CI->use_empty()) - CI->eraseFromParent(); + if (WO->use_empty()) + WO->eraseFromParent(); + + return true; +} + +bool SimplifyIndvar::eliminateSaturatingIntrinsic(SaturatingInst *SI) { + const SCEV *LHS = SE->getSCEV(SI->getLHS()); + const SCEV *RHS = SE->getSCEV(SI->getRHS()); + if (!willNotOverflow(SE, SI->getBinaryOp(), SI->isSigned(), LHS, RHS)) + return false; + + BinaryOperator *BO = BinaryOperator::Create( + SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI); + if (SI->isSigned()) + BO->setHasNoSignedWrap(); + else + BO->setHasNoUnsignedWrap(); + SI->replaceAllUsesWith(BO); + DeadInsts.emplace_back(SI); + Changed = true; return true; } @@ -548,20 +541,19 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) { if (isa<Instruction>(U) && !DT->isReachableFromEntry(cast<Instruction>(U)->getParent())) continue; - if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) { - if (ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) { - assert(L->contains(ICI->getParent()) && "LCSSA form broken?"); - // If we cannot get rid of trunc, bail. - if (ICI->isSigned() && !DoesSExtCollapse) - return false; - if (ICI->isUnsigned() && !DoesZExtCollapse) - return false; - // For equality, either signed or unsigned works. - ICmpUsers.push_back(ICI); - } else - return false; - } else + ICmpInst *ICI = dyn_cast<ICmpInst>(U); + if (!ICI) return false; + assert(L->contains(ICI->getParent()) && "LCSSA form broken?"); + if (!(ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) && + !(ICI->getOperand(1) == TI && L->isLoopInvariant(ICI->getOperand(0)))) return false; + // If we cannot get rid of trunc, bail. + if (ICI->isSigned() && !DoesSExtCollapse) + return false; + if (ICI->isUnsigned() && !DoesZExtCollapse) + return false; + // For equality, either signed or unsigned works. + ICmpUsers.push_back(ICI); } auto CanUseZExt = [&](ICmpInst *ICI) { @@ -584,7 +576,8 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) { }; // Replace all comparisons against trunc with comparisons against IV. for (auto *ICI : ICmpUsers) { - auto *Op1 = ICI->getOperand(1); + bool IsSwapped = L->isLoopInvariant(ICI->getOperand(0)); + auto *Op1 = IsSwapped ? ICI->getOperand(0) : ICI->getOperand(1); Instruction *Ext = nullptr; // For signed/unsigned predicate, replace the old comparison with comparison // of immediate IV against sext/zext of the invariant argument. If we can @@ -593,6 +586,7 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) { // TODO: If we see a signed comparison which can be turned into unsigned, // we can do it here for canonicalization purposes. ICmpInst::Predicate Pred = ICI->getPredicate(); + if (IsSwapped) Pred = ICmpInst::getSwappedPredicate(Pred); if (CanUseZExt(ICI)) { assert(DoesZExtCollapse && "Unprofitable zext?"); Ext = new ZExtInst(Op1, IVTy, "zext", ICI); @@ -636,8 +630,12 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, return eliminateSDiv(Bin); } - if (auto *CI = dyn_cast<CallInst>(UseInst)) - if (eliminateOverflowIntrinsic(CI)) + if (auto *WO = dyn_cast<WithOverflowInst>(UseInst)) + if (eliminateOverflowIntrinsic(WO)) + return true; + + if (auto *SI = dyn_cast<SaturatingInst>(UseInst)) + if (eliminateSaturatingIntrinsic(SI)) return true; if (auto *TI = dyn_cast<TruncInst>(UseInst)) @@ -730,59 +728,31 @@ bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst, /// unsigned-overflow. Returns true if anything changed, false otherwise. bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, Value *IVOperand) { - // Fastpath: we don't have any work to do if `BO` is `nuw` and `nsw`. if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap()) return false; - const SCEV *(ScalarEvolution::*GetExprForBO)(const SCEV *, const SCEV *, - SCEV::NoWrapFlags, unsigned); - switch (BO->getOpcode()) { - default: + if (BO->getOpcode() != Instruction::Add && + BO->getOpcode() != Instruction::Sub && + BO->getOpcode() != Instruction::Mul) return false; - case Instruction::Add: - GetExprForBO = &ScalarEvolution::getAddExpr; - break; - - case Instruction::Sub: - GetExprForBO = &ScalarEvolution::getMinusSCEV; - break; - - case Instruction::Mul: - GetExprForBO = &ScalarEvolution::getMulExpr; - break; - } - - unsigned BitWidth = cast<IntegerType>(BO->getType())->getBitWidth(); - Type *WideTy = IntegerType::get(BO->getContext(), BitWidth * 2); const SCEV *LHS = SE->getSCEV(BO->getOperand(0)); const SCEV *RHS = SE->getSCEV(BO->getOperand(1)); - bool Changed = false; - if (!BO->hasNoUnsignedWrap()) { - const SCEV *ExtendAfterOp = SE->getZeroExtendExpr(SE->getSCEV(BO), WideTy); - const SCEV *OpAfterExtend = (SE->*GetExprForBO)( - SE->getZeroExtendExpr(LHS, WideTy), SE->getZeroExtendExpr(RHS, WideTy), - SCEV::FlagAnyWrap, 0u); - if (ExtendAfterOp == OpAfterExtend) { - BO->setHasNoUnsignedWrap(); - SE->forgetValue(BO); - Changed = true; - } + if (!BO->hasNoUnsignedWrap() && + willNotOverflow(SE, BO->getOpcode(), /* Signed */ false, LHS, RHS)) { + BO->setHasNoUnsignedWrap(); + SE->forgetValue(BO); + Changed = true; } - if (!BO->hasNoSignedWrap()) { - const SCEV *ExtendAfterOp = SE->getSignExtendExpr(SE->getSCEV(BO), WideTy); - const SCEV *OpAfterExtend = (SE->*GetExprForBO)( - SE->getSignExtendExpr(LHS, WideTy), SE->getSignExtendExpr(RHS, WideTy), - SCEV::FlagAnyWrap, 0u); - if (ExtendAfterOp == OpAfterExtend) { - BO->setHasNoSignedWrap(); - SE->forgetValue(BO); - Changed = true; - } + if (!BO->hasNoSignedWrap() && + willNotOverflow(SE, BO->getOpcode(), /* Signed */ true, LHS, RHS)) { + BO->setHasNoSignedWrap(); + SE->forgetValue(BO); + Changed = true; } return Changed; diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 1bb26caa2af2..e0def81d5eee 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1,9 +1,8 @@ //===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -17,8 +16,10 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" @@ -35,6 +36,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/SizeOpts.h" using namespace llvm; using namespace PatternMatch; @@ -105,6 +107,12 @@ static bool callHasFloatingPointArgument(const CallInst *CI) { }); } +static bool callHasFP128Argument(const CallInst *CI) { + return any_of(CI->operands(), [](const Use &OI) { + return OI->getType()->isFP128Ty(); + }); +} + static Value *convertStrToNumber(CallInst *CI, StringRef &Str, int64_t Base) { if (Base < 2 || Base > 36) // handle special zero base @@ -334,11 +342,12 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) { return ConstantInt::get(CI->getType(), Str1.compare(Str2)); if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x - return B.CreateNeg( - B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType())); + return B.CreateNeg(B.CreateZExt( + B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType())); if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x - return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); + return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"), + CI->getType()); // strcmp(P, "x") -> memcmp(P, "x", 2) uint64_t Len1 = GetStringLength(Str1P); @@ -398,11 +407,12 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { } if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x - return B.CreateNeg( - B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType())); + return B.CreateNeg(B.CreateZExt( + B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType())); if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x - return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); + return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"), + CI->getType()); uint64_t Len1 = GetStringLength(Str1P); uint64_t Len2 = GetStringLength(Str2P); @@ -591,7 +601,8 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B, // strlen(x) != 0 --> *x != 0 // strlen(x) == 0 --> *x == 0 if (isOnlyUsedInZeroEqualityComparison(CI)) - return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType()); + return B.CreateZExt(B.CreateLoad(B.getIntNTy(CharSize), Src, "strlenfirst"), + CI->getType()); return nullptr; } @@ -735,7 +746,8 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) { // strstr("abcd", "bc") -> gep((char*)"abcd", 1) Value *Result = castToCStr(CI->getArgOperand(0), B); - Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); + Result = + B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), Result, Offset, "strstr"); return B.CreateBitCast(Result, CI->getType()); } @@ -773,7 +785,8 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { // It would be really nice to reuse switch lowering here but we can't change // the CFG at this point. // - // memchr("\r\n", C, 2) != nullptr -> (C & ((1 << '\r') | (1 << '\n'))) != 0 + // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n'))) + // != 0 // after bounds check. if (!CharC && !Str.empty() && isOnlyUsedInZeroEqualityComparison(CI)) { unsigned char Max = @@ -828,27 +841,20 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr"); } -Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { - Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); - - if (LHS == RHS) // memcmp(s,s,x) -> 0 - return Constant::getNullValue(CI->getType()); - - // Make sure we have a constant length. - ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); - if (!LenC) - return nullptr; - - uint64_t Len = LenC->getZExtValue(); +static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS, + uint64_t Len, IRBuilder<> &B, + const DataLayout &DL) { if (Len == 0) // memcmp(s1,s2,0) -> 0 return Constant::getNullValue(CI->getType()); // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS if (Len == 1) { - Value *LHSV = B.CreateZExt(B.CreateLoad(castToCStr(LHS, B), "lhsc"), - CI->getType(), "lhsv"); - Value *RHSV = B.CreateZExt(B.CreateLoad(castToCStr(RHS, B), "rhsc"), - CI->getType(), "rhsv"); + Value *LHSV = + B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(LHS, B), "lhsc"), + CI->getType(), "lhsv"); + Value *RHSV = + B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(RHS, B), "rhsc"), + CI->getType(), "rhsv"); return B.CreateSub(LHSV, RHSV, "chardiff"); } @@ -878,12 +884,12 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { if (!LHSV) { Type *LHSPtrTy = IntType->getPointerTo(LHS->getType()->getPointerAddressSpace()); - LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy), "lhsv"); + LHSV = B.CreateLoad(IntType, B.CreateBitCast(LHS, LHSPtrTy), "lhsv"); } if (!RHSV) { Type *RHSPtrTy = IntType->getPointerTo(RHS->getType()->getPointerAddressSpace()); - RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy), "rhsv"); + RHSV = B.CreateLoad(IntType, B.CreateBitCast(RHS, RHSPtrTy), "rhsv"); } return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp"); } @@ -907,10 +913,48 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { Ret = 1; return ConstantInt::get(CI->getType(), Ret); } + return nullptr; +} + +// Most simplifications for memcmp also apply to bcmp. +Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI, + IRBuilder<> &B) { + Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); + Value *Size = CI->getArgOperand(2); + + if (LHS == RHS) // memcmp(s,s,x) -> 0 + return Constant::getNullValue(CI->getType()); + + // Handle constant lengths. + if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size)) + if (Value *Res = optimizeMemCmpConstantSize(CI, LHS, RHS, + LenC->getZExtValue(), B, DL)) + return Res; + + return nullptr; +} + +Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { + if (Value *V = optimizeMemCmpBCmpCommon(CI, B)) + return V; + + // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0 + // `bcmp` can be more efficient than memcmp because it only has to know that + // there is a difference, not where it is. + if (isOnlyUsedInZeroEqualityComparison(CI) && TLI->has(LibFunc_bcmp)) { + Value *LHS = CI->getArgOperand(0); + Value *RHS = CI->getArgOperand(1); + Value *Size = CI->getArgOperand(2); + return emitBCmp(LHS, RHS, Size, B, DL, TLI); + } return nullptr; } +Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilder<> &B) { + return optimizeMemCmpBCmpCommon(CI, B); +} + Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n) B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, @@ -1031,7 +1075,8 @@ static Value *valueHasFloatPrecision(Value *Val) { /// Shrink double -> float functions. static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B, bool isBinary, bool isPrecise = false) { - if (!CI->getType()->isDoubleTy()) + Function *CalleeFn = CI->getCalledFunction(); + if (!CI->getType()->isDoubleTy() || !CalleeFn) return nullptr; // If not all the uses of the function are converted to float, then bail out. @@ -1051,15 +1096,16 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B, if (!V[0] || (isBinary && !V[1])) return nullptr; + StringRef CalleeNm = CalleeFn->getName(); + AttributeList CalleeAt = CalleeFn->getAttributes(); + bool CalleeIn = CalleeFn->isIntrinsic(); + // If call isn't an intrinsic, check that it isn't within a function with the // same name as the float version of this call, otherwise the result is an // infinite loop. For example, from MinGW-w64: // // float expf(float val) { return (float) exp((double) val); } - Function *CalleeFn = CI->getCalledFunction(); - StringRef CalleeNm = CalleeFn->getName(); - AttributeList CalleeAt = CalleeFn->getAttributes(); - if (CalleeFn && !CalleeFn->isIntrinsic()) { + if (!CalleeIn) { const Function *Fn = CI->getFunction(); StringRef FnName = Fn->getName(); if (FnName.back() == 'f' && @@ -1074,7 +1120,7 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B, // g((double) float) -> (double) gf(float) Value *R; - if (CalleeFn->isIntrinsic()) { + if (CalleeIn) { Module *M = CI->getModule(); Intrinsic::ID IID = CalleeFn->getIntrinsicID(); Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy()); @@ -1132,10 +1178,10 @@ static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func, IRBuilder<> &B) { if (!isa<FPMathOperator>(Call)) return nullptr; - + IRBuilder<>::FastMathFlagGuard Guard(B); B.setFastMathFlags(Call->getFastMathFlags()); - + // TODO: Can this be shared to also handle LLVM intrinsics? Value *X; switch (Func) { @@ -1189,7 +1235,8 @@ static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) { } /// Use exp{,2}(x * y) for pow(exp{,2}(x), y); -/// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x). +/// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x); +/// exp2(log2(n) * x) for pow(n, x). Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); AttributeList Attrs = Pow->getCalledFunction()->getAttributes(); @@ -1276,12 +1323,12 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { APFloat BaseR = APFloat(1.0); BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored); BaseR = BaseR / *BaseF; - bool IsInteger = BaseF->isInteger(), - IsReciprocal = BaseR.isInteger(); + bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger(); const APFloat *NF = IsReciprocal ? &BaseR : BaseF; APSInt NI(64, false); if ((IsInteger || IsReciprocal) && - !NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) && + NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) == + APFloat::opOK && NI > 1 && NI.isPowerOf2()) { double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0); Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul"); @@ -1301,6 +1348,28 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l, B, Attrs); + // pow(n, x) -> exp2(log2(n) * x) + if (Pow->hasOneUse() && Pow->hasApproxFunc() && Pow->hasNoNaNs() && + Pow->hasNoInfs() && BaseF->isNormal() && !BaseF->isNegative()) { + Value *Log = nullptr; + if (Ty->isFloatTy()) + Log = ConstantFP::get(Ty, std::log2(BaseF->convertToFloat())); + else if (Ty->isDoubleTy()) + Log = ConstantFP::get(Ty, std::log2(BaseF->convertToDouble())); + + if (Log) { + Value *FMul = B.CreateFMul(Log, Expo, "mul"); + if (Pow->doesNotAccessMemory()) { + return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty), + FMul, "exp2"); + } else { + if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, + LibFunc_exp2l)) + return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f, + LibFunc_exp2l, B, Attrs); + } + } + } return nullptr; } @@ -1364,12 +1433,22 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) { return Sqrt; } +static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M, + IRBuilder<> &B) { + Value *Args[] = {Base, Expo}; + Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType()); + return B.CreateCall(F, Args); +} + Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { - Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); + Value *Base = Pow->getArgOperand(0); + Value *Expo = Pow->getArgOperand(1); Function *Callee = Pow->getCalledFunction(); StringRef Name = Callee->getName(); Type *Ty = Pow->getType(); + Module *M = Pow->getModule(); Value *Shrunk = nullptr; + bool AllowApprox = Pow->hasApproxFunc(); bool Ignored; // Bail out if simplifying libcalls to pow() is disabled. @@ -1382,8 +1461,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { // Shrink pow() to powf() if the arguments are single precision, // unless the result is expected to be double precision. - if (UnsafeFPShrink && - Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name)) + if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) && + hasFloatVersion(Name)) Shrunk = optimizeBinaryDoubleFP(Pow, B, true); // Evaluate special cases related to the base. @@ -1403,7 +1482,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { // pow(x, 0.0) -> 1.0 if (match(Expo, m_SpecificFP(0.0))) - return ConstantFP::get(Ty, 1.0); + return ConstantFP::get(Ty, 1.0); // pow(x, 1.0) -> x if (match(Expo, m_FPOne())) @@ -1418,7 +1497,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { // pow(x, n) -> x * x * x * ... const APFloat *ExpoF; - if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) { + if (AllowApprox && match(Expo, m_APFloat(ExpoF))) { // We limit to a max of 7 multiplications, thus the maximum exponent is 32. // If the exponent is an integer+0.5 we generate a call to sqrt and an // additional fmul. @@ -1442,9 +1521,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { if (!Expo2.isInteger()) return nullptr; - Sqrt = - getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), - Pow->doesNotAccessMemory(), Pow->getModule(), B, TLI); + Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), + Pow->doesNotAccessMemory(), M, B, TLI); } // We will memoize intermediate products of the Addition Chain. @@ -1467,6 +1545,29 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { return FMul; } + + APSInt IntExpo(32, /*isUnsigned=*/false); + // powf(x, n) -> powi(x, n) if n is a constant signed integer value + if (ExpoF->isInteger() && + ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) == + APFloat::opOK) { + return createPowWithIntegerExponent( + Base, ConstantInt::get(B.getInt32Ty(), IntExpo), M, B); + } + } + + // powf(x, itofp(y)) -> powi(x, y) + if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) { + Value *IntExpo = cast<Instruction>(Expo)->getOperand(0); + Value *NewExpo = nullptr; + unsigned BitWidth = IntExpo->getType()->getPrimitiveSizeInBits(); + if (isa<SIToFPInst>(Expo) && BitWidth == 32) + NewExpo = IntExpo; + else if (BitWidth < 32) + NewExpo = isa<SIToFPInst>(Expo) ? B.CreateSExt(IntExpo, B.getInt32Ty()) + : B.CreateZExt(IntExpo, B.getInt32Ty()); + if (NewExpo) + return createPowWithIntegerExponent(Base, NewExpo, M, B); } return Shrunk; @@ -1504,9 +1605,8 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { One = ConstantExpr::getFPExtend(One, Op->getType()); Module *M = CI->getModule(); - Value *NewCallee = - M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(), - Op->getType(), B.getInt32Ty()); + FunctionCallee NewCallee = M->getOrInsertFunction( + TLI->getName(LdExp), Op->getType(), Op->getType(), B.getInt32Ty()); CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg}); if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -1518,40 +1618,30 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); // If we can shrink the call to a float function rather than a double // function, do that first. + Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name)) if (Value *Ret = optimizeBinaryDoubleFP(CI, B)) return Ret; + // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to + // the intrinsics for improved optimization (for example, vectorization). + // No-signed-zeros is implied by the definitions of fmax/fmin themselves. + // From the C standard draft WG14/N1256: + // "Ideally, fmax would be sensitive to the sign of zero, for example + // fmax(-0.0, +0.0) would return +0; however, implementation in software + // might be impractical." IRBuilder<>::FastMathFlagGuard Guard(B); - FastMathFlags FMF; - if (CI->isFast()) { - // If the call is 'fast', then anything we create here will also be 'fast'. - FMF.setFast(); - } else { - // At a minimum, no-nans-fp-math must be true. - if (!CI->hasNoNaNs()) - return nullptr; - // No-signed-zeros is implied by the definitions of fmax/fmin themselves: - // "Ideally, fmax would be sensitive to the sign of zero, for example - // fmax(-0. 0, +0. 0) would return +0; however, implementation in software - // might be impractical." - FMF.setNoSignedZeros(); - FMF.setNoNaNs(); - } + FastMathFlags FMF = CI->getFastMathFlags(); + FMF.setNoSignedZeros(); B.setFastMathFlags(FMF); - // We have a relaxed floating-point environment. We can ignore NaN-handling - // and transform to a compare and select. We do not have to consider errno or - // exceptions, because fmin/fmax do not have those. - Value *Op0 = CI->getArgOperand(0); - Value *Op1 = CI->getArgOperand(1); - Value *Cmp = Callee->getName().startswith("fmin") ? - B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1); - return B.CreateSelect(Cmp, Op0, Op1); + Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum + : Intrinsic::maxnum; + Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType()); + return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) }); } Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { @@ -1654,13 +1744,13 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { // replace it with the fabs of that factor. Module *M = Callee->getParent(); Type *ArgType = I->getType(); - Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType); + Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType); Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs"); if (OtherOp) { // If we found a non-repeated factor, we still need to get its square // root. We then multiply that by the value that was simplified out // of the square root calculation. - Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType); + Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType); Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt"); return B.CreateFMul(FabsCall, SqrtCall); } @@ -1728,8 +1818,8 @@ static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, } Module *M = OrigCallee->getParent(); - Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(), - ResTy, ArgTy); + FunctionCallee Callee = + M->getOrInsertFunction(Name, OrigCallee->getAttributes(), ResTy, ArgTy); if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) { // If the argument is an instruction, it must dominate all uses so put our @@ -1840,8 +1930,8 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) { // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0 Value *Op = CI->getArgOperand(0); Type *ArgType = Op->getType(); - Value *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(), - Intrinsic::cttz, ArgType); + Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(), + Intrinsic::cttz, ArgType); Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz"); V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1)); V = B.CreateIntCast(V, B.getInt32Ty(), false); @@ -1854,8 +1944,8 @@ Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilder<> &B) { // fls(x) -> (i32)(sizeInBits(x) - llvm.ctlz(x, false)) Value *Op = CI->getArgOperand(0); Type *ArgType = Op->getType(); - Value *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(), - Intrinsic::ctlz, ArgType); + Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(), + Intrinsic::ctlz, ArgType); Value *V = B.CreateCall(F, {Op, B.getFalse()}, "ctlz"); V = B.CreateSub(ConstantInt::get(V->getType(), ArgType->getIntegerBitWidth()), V); @@ -2026,13 +2116,27 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) { // arguments. if (TLI->has(LibFunc_iprintf) && !callHasFloatingPointArgument(CI)) { Module *M = B.GetInsertBlock()->getParent()->getParent(); - Constant *IPrintFFn = + FunctionCallee IPrintFFn = M->getOrInsertFunction("iprintf", FT, Callee->getAttributes()); CallInst *New = cast<CallInst>(CI->clone()); New->setCalledFunction(IPrintFFn); B.Insert(New); return New; } + + // printf(format, ...) -> __small_printf(format, ...) if no 128-bit floating point + // arguments. + if (TLI->has(LibFunc_small_printf) && !callHasFP128Argument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + auto SmallPrintFFn = + M->getOrInsertFunction(TLI->getName(LibFunc_small_printf), + FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(SmallPrintFFn); + B.Insert(New); + return New; + } + return nullptr; } @@ -2077,7 +2181,8 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) { } if (FormatStr[1] == 's') { - // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) + // sprintf(dest, "%s", str) -> llvm.memcpy(align 1 dest, align 1 str, + // strlen(str)+1) if (!CI->getArgOperand(2)->getType()->isPointerTy()) return nullptr; @@ -2105,13 +2210,27 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) { // point arguments. if (TLI->has(LibFunc_siprintf) && !callHasFloatingPointArgument(CI)) { Module *M = B.GetInsertBlock()->getParent()->getParent(); - Constant *SIPrintFFn = + FunctionCallee SIPrintFFn = M->getOrInsertFunction("siprintf", FT, Callee->getAttributes()); CallInst *New = cast<CallInst>(CI->clone()); New->setCalledFunction(SIPrintFFn); B.Insert(New); return New; } + + // sprintf(str, format, ...) -> __small_sprintf(str, format, ...) if no 128-bit + // floating point arguments. + if (TLI->has(LibFunc_small_sprintf) && !callHasFP128Argument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + auto SmallSPrintFFn = + M->getOrInsertFunction(TLI->getName(LibFunc_small_sprintf), + FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(SmallSPrintFFn); + B.Insert(New); + return New; + } + return nullptr; } @@ -2140,7 +2259,7 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) { else if (N < FormatStr.size() + 1) return nullptr; - // sprintf(str, size, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, + // snprintf(dst, size, fmt) -> llvm.memcpy(align 1 dst, align 1 fmt, // strlen(fmt)+1) B.CreateMemCpy( CI->getArgOperand(0), 1, CI->getArgOperand(2), 1, @@ -2262,13 +2381,27 @@ Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilder<> &B) { // floating point arguments. if (TLI->has(LibFunc_fiprintf) && !callHasFloatingPointArgument(CI)) { Module *M = B.GetInsertBlock()->getParent()->getParent(); - Constant *FIPrintFFn = + FunctionCallee FIPrintFFn = M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes()); CallInst *New = cast<CallInst>(CI->clone()); New->setCalledFunction(FIPrintFFn); B.Insert(New); return New; } + + // fprintf(stream, format, ...) -> __small_fprintf(stream, format, ...) if no + // 128-bit floating point arguments. + if (TLI->has(LibFunc_small_fprintf) && !callHasFP128Argument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + auto SmallFPrintFFn = + M->getOrInsertFunction(TLI->getName(LibFunc_small_fprintf), + FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(SmallFPrintFFn); + B.Insert(New); + return New; + } + return nullptr; } @@ -2288,7 +2421,8 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) { // If this is writing one byte, turn it into fputc. // This optimisation is only valid, if the return value is unused. if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) - Value *Char = B.CreateLoad(castToCStr(CI->getArgOperand(0), B), "char"); + Value *Char = B.CreateLoad(B.getInt8Ty(), + castToCStr(CI->getArgOperand(0), B), "char"); Value *NewCI = emitFPutC(Char, CI->getArgOperand(3), B, TLI); return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr; } @@ -2307,7 +2441,9 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) { // Don't rewrite fputs to fwrite when optimising for size because fwrite // requires more arguments and thus extra MOVs are required. - if (CI->getFunction()->optForSize()) + bool OptForSize = CI->getFunction()->hasOptSize() || + llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); + if (OptForSize) return nullptr; // Check if has any use @@ -2320,7 +2456,7 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) { return nullptr; } - // fputs(s,F) --> fwrite(s,1,strlen(s),F) + // fputs(s,F) --> fwrite(s,strlen(s),1,F) uint64_t Len = GetStringLength(CI->getArgOperand(0)); if (!Len) return nullptr; @@ -2367,18 +2503,14 @@ Value *LibCallSimplifier::optimizeFRead(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) { - // Check for a constant string. - StringRef Str; - if (!getConstantStringInfo(CI->getArgOperand(0), Str)) + if (!CI->use_empty()) return nullptr; - if (Str.empty() && CI->use_empty()) { - // puts("") -> putchar('\n') - Value *Res = emitPutChar(B.getInt32('\n'), B, TLI); - if (CI->use_empty() || !Res) - return Res; - return B.CreateIntCast(Res, CI->getType(), true); - } + // Check for a constant string. + // puts("") -> putchar('\n') + StringRef Str; + if (getConstantStringInfo(CI->getArgOperand(0), Str) && Str.empty()) + return emitPutChar(B.getInt32('\n'), B, TLI); return nullptr; } @@ -2441,6 +2573,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeStrStr(CI, Builder); case LibFunc_memchr: return optimizeMemChr(CI, Builder); + case LibFunc_bcmp: + return optimizeBCmp(CI, Builder); case LibFunc_memcmp: return optimizeMemCmp(CI, Builder); case LibFunc_memcpy: @@ -2686,9 +2820,10 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { LibCallSimplifier::LibCallSimplifier( const DataLayout &DL, const TargetLibraryInfo *TLI, OptimizationRemarkEmitter &ORE, + BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, function_ref<void(Instruction *, Value *)> Replacer, function_ref<void(Instruction *)> Eraser) - : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), + : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), BFI(BFI), PSI(PSI), UnsafeFPShrink(false), Replacer(Replacer), Eraser(Eraser) {} void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) { @@ -2735,12 +2870,23 @@ void LibCallSimplifier::eraseFromParent(Instruction *I) { // Fortified Library Call Optimizations //===----------------------------------------------------------------------===// -bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, - unsigned ObjSizeOp, - unsigned SizeOp, - bool isString) { - if (CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(SizeOp)) +bool +FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, + unsigned ObjSizeOp, + Optional<unsigned> SizeOp, + Optional<unsigned> StrOp, + Optional<unsigned> FlagOp) { + // If this function takes a flag argument, the implementation may use it to + // perform extra checks. Don't fold into the non-checking variant. + if (FlagOp) { + ConstantInt *Flag = dyn_cast<ConstantInt>(CI->getArgOperand(*FlagOp)); + if (!Flag || !Flag->isZero()) + return false; + } + + if (SizeOp && CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(*SizeOp)) return true; + if (ConstantInt *ObjSizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) { if (ObjSizeCI->isMinusOne()) @@ -2748,23 +2894,27 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, // If the object size wasn't -1 (unknown), bail out if we were asked to. if (OnlyLowerUnknownSize) return false; - if (isString) { - uint64_t Len = GetStringLength(CI->getArgOperand(SizeOp)); + if (StrOp) { + uint64_t Len = GetStringLength(CI->getArgOperand(*StrOp)); // If the length is 0 we don't know how long it is and so we can't // remove the check. if (Len == 0) return false; return ObjSizeCI->getZExtValue() >= Len; } - if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeOp))) - return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue(); + + if (SizeOp) { + if (ConstantInt *SizeCI = + dyn_cast<ConstantInt>(CI->getArgOperand(*SizeOp))) + return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue(); + } } return false; } Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) { - if (isFortifiedCallFoldable(CI, 3, 2, false)) { + if (isFortifiedCallFoldable(CI, 3, 2)) { B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, CI->getArgOperand(2)); return CI->getArgOperand(0); @@ -2774,7 +2924,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) { - if (isFortifiedCallFoldable(CI, 3, 2, false)) { + if (isFortifiedCallFoldable(CI, 3, 2)) { B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, CI->getArgOperand(2)); return CI->getArgOperand(0); @@ -2786,7 +2936,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &B) { // TODO: Try foldMallocMemset() here. - if (isFortifiedCallFoldable(CI, 3, 2, false)) { + if (isFortifiedCallFoldable(CI, 3, 2)) { Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); return CI->getArgOperand(0); @@ -2797,8 +2947,6 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, IRBuilder<> &B, LibFunc Func) { - Function *Callee = CI->getCalledFunction(); - StringRef Name = Callee->getName(); const DataLayout &DL = CI->getModule()->getDataLayout(); Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1), *ObjSize = CI->getArgOperand(2); @@ -2814,8 +2962,12 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, // st[rp]cpy_chk call which may fail at runtime if the size is too long. // TODO: It might be nice to get a maximum length out of the possible // string lengths for varying. - if (isFortifiedCallFoldable(CI, 2, 1, true)) - return emitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6)); + if (isFortifiedCallFoldable(CI, 2, None, 1)) { + if (Func == LibFunc_strcpy_chk) + return emitStrCpy(Dst, Src, B, TLI); + else + return emitStpCpy(Dst, Src, B, TLI); + } if (OnlyLowerUnknownSize) return nullptr; @@ -2838,13 +2990,99 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI, IRBuilder<> &B, LibFunc Func) { - Function *Callee = CI->getCalledFunction(); - StringRef Name = Callee->getName(); - if (isFortifiedCallFoldable(CI, 3, 2, false)) { - Value *Ret = emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, TLI, Name.substr(2, 7)); - return Ret; + if (isFortifiedCallFoldable(CI, 3, 2)) { + if (Func == LibFunc_strncpy_chk) + return emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI); + else + return emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI); } + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI, + IRBuilder<> &B) { + if (isFortifiedCallFoldable(CI, 4, 3)) + return emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), CI->getArgOperand(3), B, TLI); + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI, + IRBuilder<> &B) { + if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) { + SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 5, CI->arg_end()); + return emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(4), VariadicArgs, B, TLI); + } + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI, + IRBuilder<> &B) { + if (isFortifiedCallFoldable(CI, 2, None, None, 1)) { + SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 4, CI->arg_end()); + return emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), VariadicArgs, + B, TLI); + } + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI, + IRBuilder<> &B) { + if (isFortifiedCallFoldable(CI, 2)) + return emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI); + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI, + IRBuilder<> &B) { + if (isFortifiedCallFoldable(CI, 3)) + return emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI); + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI, + IRBuilder<> &B) { + if (isFortifiedCallFoldable(CI, 3)) + return emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI); + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI, + IRBuilder<> &B) { + if (isFortifiedCallFoldable(CI, 3)) + return emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI); + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI, + IRBuilder<> &B) { + if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) + return emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(4), CI->getArgOperand(5), B, TLI); + + return nullptr; +} + +Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI, + IRBuilder<> &B) { + if (isFortifiedCallFoldable(CI, 2, None, None, 1)) + return emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), + CI->getArgOperand(4), B, TLI); + return nullptr; } @@ -2892,6 +3130,24 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { case LibFunc_stpncpy_chk: case LibFunc_strncpy_chk: return optimizeStrpNCpyChk(CI, Builder, Func); + case LibFunc_memccpy_chk: + return optimizeMemCCpyChk(CI, Builder); + case LibFunc_snprintf_chk: + return optimizeSNPrintfChk(CI, Builder); + case LibFunc_sprintf_chk: + return optimizeSPrintfChk(CI, Builder); + case LibFunc_strcat_chk: + return optimizeStrCatChk(CI, Builder); + case LibFunc_strlcat_chk: + return optimizeStrLCat(CI, Builder); + case LibFunc_strncat_chk: + return optimizeStrNCatChk(CI, Builder); + case LibFunc_strlcpy_chk: + return optimizeStrLCpyChk(CI, Builder); + case LibFunc_vsnprintf_chk: + return optimizeVSNPrintfChk(CI, Builder); + case LibFunc_vsprintf_chk: + return optimizeVSPrintfChk(CI, Builder); default: break; } diff --git a/contrib/llvm/lib/Transforms/Utils/SizeOpts.cpp b/contrib/llvm/lib/Transforms/Utils/SizeOpts.cpp new file mode 100644 index 000000000000..1519751197d2 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/SizeOpts.cpp @@ -0,0 +1,37 @@ +//===-- SizeOpts.cpp - code size optimization related code ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains some shared code size optimization related code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/SizeOpts.h" +using namespace llvm; + +static cl::opt<bool> ProfileGuidedSizeOpt( + "pgso", cl::Hidden, cl::init(true), + cl::desc("Enable the profile guided size optimization. ")); + +bool llvm::shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI) { + assert(F); + if (!PSI || !BFI || !PSI->hasProfileSummary()) + return false; + return ProfileGuidedSizeOpt && PSI->isFunctionColdInCallGraph(F, *BFI); +} + +bool llvm::shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI) { + assert(BB); + if (!PSI || !BFI || !PSI->hasProfileSummary()) + return false; + return ProfileGuidedSizeOpt && PSI->isColdBlock(BB, BFI); +} diff --git a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp index 5db4d2e4df9d..e2c387cb8983 100644 --- a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp @@ -1,9 +1,8 @@ //===- SplitModule.cpp - Split a module into partitions -------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp index ac0b519f4a77..50844cf9d1c5 100644 --- a/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp +++ b/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp @@ -1,9 +1,8 @@ //===- StripGCRelocates.cpp - Remove gc.relocates inserted by RewriteStatePoints===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp index 8956a089a99c..97a4533fabe5 100644 --- a/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp +++ b/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp @@ -1,9 +1,8 @@ //===- StripNonLineTableDebugInfo.cpp -- Strip parts of Debug Info --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp index fd0da79487f1..456724779b43 100644 --- a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp @@ -1,9 +1,8 @@ //===- SymbolRewriter.cpp - Symbol Rewriter -------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index d49b26472548..7f7bdf8a3d6d 100644 --- a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -1,9 +1,8 @@ //===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp index 95416de07439..5272ab6e95d5 100644 --- a/contrib/llvm/lib/Transforms/Utils/Utils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp @@ -1,9 +1,8 @@ //===-- Utils.cpp - TransformUtils Infrastructure -------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -55,3 +54,6 @@ void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createPromoteMemoryToRegisterPass()); } +void LLVMAddAddDiscriminatorsPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createAddDiscriminatorsPass()); +} diff --git a/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp b/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp index 948d9bd5baad..a77bf50fe10b 100644 --- a/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp +++ b/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp @@ -14,13 +14,17 @@ namespace VNCoercion { /// Return true if coerceAvailableValueToLoadType will succeed. bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, const DataLayout &DL) { + Type *StoredTy = StoredVal->getType(); + if (StoredTy == LoadTy) + return true; + // If the loaded or stored value is an first class array or struct, don't try // to transform them. We need to be able to bitcast to integer. - if (LoadTy->isStructTy() || LoadTy->isArrayTy() || - StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy()) + if (LoadTy->isStructTy() || LoadTy->isArrayTy() || StoredTy->isStructTy() || + StoredTy->isArrayTy()) return false; - uint64_t StoreSize = DL.getTypeSizeInBits(StoredVal->getType()); + uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy); // The store size must be byte-aligned to support future type casts. if (llvm::alignTo(StoreSize, 8) != StoreSize) @@ -31,10 +35,16 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, return false; // Don't coerce non-integral pointers to integers or vice versa. - if (DL.isNonIntegralPointerType(StoredVal->getType()) != - DL.isNonIntegralPointerType(LoadTy)) + if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) != + DL.isNonIntegralPointerType(LoadTy->getScalarType())) { + // As a special case, allow coercion of memset used to initialize + // an array w/null. Despite non-integral pointers not generally having a + // specific bit pattern, we do assume null is zero. + if (auto *CI = dyn_cast<Constant>(StoredVal)) + return CI->isNullValue(); return false; - + } + return true; } @@ -207,11 +217,22 @@ static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, /// memdep query of a load that ends up being a clobbering store. int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, StoreInst *DepSI, const DataLayout &DL) { + auto *StoredVal = DepSI->getValueOperand(); + // Cannot handle reading from store of first-class aggregate yet. - if (DepSI->getValueOperand()->getType()->isStructTy() || - DepSI->getValueOperand()->getType()->isArrayTy()) + if (StoredVal->getType()->isStructTy() || + StoredVal->getType()->isArrayTy()) return -1; + // Don't coerce non-integral pointers to integers or vice versa. + if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) != + DL.isNonIntegralPointerType(LoadTy->getScalarType())) { + // Allow casts of zero values to null as a special case + auto *CI = dyn_cast<Constant>(StoredVal); + if (!CI || !CI->isNullValue()) + return -1; + } + Value *StorePtr = DepSI->getPointerOperand(); uint64_t StoreSize = DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()); @@ -228,6 +249,11 @@ int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) return -1; + // Don't coerce non-integral pointers to integers or vice versa. + if (DL.isNonIntegralPointerType(DepLI->getType()->getScalarType()) != + DL.isNonIntegralPointerType(LoadTy->getScalarType())) + return -1; + Value *DepPtr = DepLI->getPointerOperand(); uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()); int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL); @@ -264,9 +290,15 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, // If this is memset, we just need to see if the offset is valid in the size // of the memset.. - if (MI->getIntrinsicID() == Intrinsic::memset) + if (MI->getIntrinsicID() == Intrinsic::memset) { + if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) { + auto *CI = dyn_cast<ConstantInt>(cast<MemSetInst>(MI)->getValue()); + if (!CI || !CI->isZero()) + return -1; + } return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), MemSizeInBits, DL); + } // If we have a memcpy/memmove, the only case we can handle is if this is a // copy from constant memory. In that case, we can read directly from the @@ -278,7 +310,7 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, return -1; GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL)); - if (!GV || !GV->isConstant()) + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) return -1; // See if the access is within the bounds of the transfer. @@ -287,6 +319,12 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, if (Offset == -1) return Offset; + // Don't coerce non-integral pointers to integers or vice versa, and the + // memtransfer is implicitly a raw byte code + if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) + // TODO: Can allow nullptrs from constant zeros + return -1; + unsigned AS = Src->getType()->getPointerAddressSpace(); // Otherwise, see if we can constant fold a load from the constant with the // offset applied as appropriate. @@ -386,12 +424,12 @@ Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy, // memdep queries will find the new load. We can't easily remove the old // load completely because it is already in the value numbering table. IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); - Type *DestPTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8); - DestPTy = - PointerType::get(DestPTy, PtrVal->getType()->getPointerAddressSpace()); + Type *DestTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8); + Type *DestPTy = + PointerType::get(DestTy, PtrVal->getType()->getPointerAddressSpace()); Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); - LoadInst *NewLoad = Builder.CreateLoad(PtrVal); + LoadInst *NewLoad = Builder.CreateLoad(DestTy, PtrVal); NewLoad->takeName(SrcVal); NewLoad->setAlignment(SrcVal->getAlignment()); diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp index 55fff3f3872a..fbc3407c301f 100644 --- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -1,9 +1,8 @@ //===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -914,6 +913,21 @@ void Mapper::remapInstruction(Instruction *I) { Tys.push_back(TypeMapper->remapType(Ty)); CS.mutateFunctionType(FunctionType::get( TypeMapper->remapType(I->getType()), Tys, FTy->isVarArg())); + + LLVMContext &C = CS->getContext(); + AttributeList Attrs = CS.getAttributes(); + for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) { + if (Attrs.hasAttribute(i, Attribute::ByVal)) { + Type *Ty = Attrs.getAttribute(i, Attribute::ByVal).getValueAsType(); + if (!Ty) + continue; + + Attrs = Attrs.removeAttribute(C, i, Attribute::ByVal); + Attrs = Attrs.addAttribute( + C, i, Attribute::getWithByValType(C, TypeMapper->remapType(Ty))); + } + } + CS.setAttributes(Attrs); return; } if (auto *AI = dyn_cast<AllocaInst>(I)) |