diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils')
53 files changed, 4356 insertions, 1309 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp index 0f0668f24db5..e3ef42362223 100644 --- a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp +++ b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp @@ -69,7 +69,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" #include <utility> using namespace llvm; @@ -114,7 +114,7 @@ static bool shouldHaveDiscriminator(const Instruction *I) { return !isa<IntrinsicInst>(I) || isa<MemIntrinsic>(I); } -/// \brief Assign DWARF discriminators. +/// Assign DWARF discriminators. /// /// To assign discriminators, we examine the boundaries of every /// basic block and its successors. Suppose there is a basic block B1 @@ -210,9 +210,9 @@ static bool addDiscriminators(Function &F) { // it in 1 byte ULEB128 representation. unsigned Discriminator = R.second ? ++LDM[L] : LDM[L]; I.setDebugLoc(DIL->setBaseDiscriminator(Discriminator)); - DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" - << DIL->getColumn() << ":" << Discriminator << " " << I - << "\n"); + LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" + << DIL->getColumn() << ":" << Discriminator << " " << I + << "\n"); Changed = true; } } diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 8f59913e14bb..516a785dce1e 100644 --- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -36,7 +37,6 @@ #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" -#include "llvm/Transforms/Utils/Local.h" #include <cassert> #include <cstdint> #include <string> @@ -45,16 +45,22 @@ using namespace llvm; -void llvm::DeleteDeadBlock(BasicBlock *BB) { +void llvm::DeleteDeadBlock(BasicBlock *BB, DeferredDominance *DDT) { assert((pred_begin(BB) == pred_end(BB) || // Can delete self loop. BB->getSinglePredecessor() == BB) && "Block is not dead!"); TerminatorInst *BBTerm = BB->getTerminator(); + std::vector<DominatorTree::UpdateType> Updates; // Loop through all of our successors and make sure they know that one // of their predecessors is going away. - for (BasicBlock *Succ : BBTerm->successors()) + if (DDT) + Updates.reserve(BBTerm->getNumSuccessors()); + for (BasicBlock *Succ : BBTerm->successors()) { Succ->removePredecessor(BB); + if (DDT) + Updates.push_back({DominatorTree::Delete, BB, Succ}); + } // Zap all the instructions in the block. while (!BB->empty()) { @@ -69,8 +75,12 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) { BB->getInstList().pop_back(); } - // Zap the block! - BB->eraseFromParent(); + if (DDT) { + DDT->applyUpdates(Updates); + DDT->deleteBB(BB); // Deferred deletion of BB. + } else { + BB->eraseFromParent(); // Zap the block! + } } void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, @@ -107,9 +117,12 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT, LoopInfo *LI, - MemoryDependenceResults *MemDep) { - // Don't merge away blocks who have their address taken. - if (BB->hasAddressTaken()) return false; + MemoryDependenceResults *MemDep, + DeferredDominance *DDT) { + assert(!(DT && DDT) && "Cannot call with both DT and DDT."); + + if (BB->hasAddressTaken()) + return false; // Can't merge if there are multiple predecessors, or no predecessors. BasicBlock *PredBB = BB->getUniquePredecessor(); @@ -121,16 +134,9 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT, if (PredBB->getTerminator()->isExceptional()) return false; - succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB)); - BasicBlock *OnlySucc = BB; - for (; SI != SE; ++SI) - if (*SI != OnlySucc) { - OnlySucc = nullptr; // There are multiple distinct successors! - break; - } - - // Can't merge if there are multiple successors. - if (!OnlySucc) return false; + // Can't merge if there are multiple distinct successors. + if (PredBB->getUniqueSuccessor() != BB) + return false; // Can't merge if there is PHI loop. for (PHINode &PN : BB->phis()) @@ -139,14 +145,27 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT, return false; // Begin by getting rid of unneeded PHIs. - SmallVector<Value *, 4> IncomingValues; + SmallVector<AssertingVH<Value>, 4> IncomingValues; if (isa<PHINode>(BB->front())) { for (PHINode &PN : BB->phis()) - if (PN.getIncomingValue(0) != &PN) + if (!isa<PHINode>(PN.getIncomingValue(0)) || + cast<PHINode>(PN.getIncomingValue(0))->getParent() != BB) IncomingValues.push_back(PN.getIncomingValue(0)); FoldSingleEntryPHINodes(BB, MemDep); } + // Deferred DT update: Collect all the edges that exit BB. These + // dominator edges will be redirected from Pred. + std::vector<DominatorTree::UpdateType> Updates; + if (DDT) { + Updates.reserve(1 + (2 * succ_size(BB))); + Updates.push_back({DominatorTree::Delete, PredBB, BB}); + for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + Updates.push_back({DominatorTree::Delete, BB, *I}); + Updates.push_back({DominatorTree::Insert, PredBB, *I}); + } + } + // Delete the unconditional branch from the predecessor... PredBB->getInstList().pop_back(); @@ -158,8 +177,8 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT, PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); // Eliminate duplicate dbg.values describing the entry PHI node post-splice. - for (auto *Incoming : IncomingValues) { - if (isa<Instruction>(Incoming)) { + for (auto Incoming : IncomingValues) { + if (isa<Instruction>(*Incoming)) { SmallVector<DbgValueInst *, 2> DbgValues; SmallDenseSet<std::pair<DILocalVariable *, DIExpression *>, 2> DbgValueSet; @@ -193,7 +212,12 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT, if (MemDep) MemDep->invalidateCachedPredecessors(); - BB->eraseFromParent(); + if (DDT) { + DDT->deleteBB(BB); // Deferred deletion of BB. + DDT->applyUpdates(Updates); + } else { + BB->eraseFromParent(); // Nuke BB. + } return true; } @@ -309,13 +333,21 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA, bool &HasLoopExit) { // Update dominator tree if available. - if (DT) - DT->splitBlock(NewBB); + if (DT) { + if (OldBB == DT->getRootNode()->getBlock()) { + assert(NewBB == &NewBB->getParent()->getEntryBlock()); + DT->setNewRoot(NewBB); + } else { + // Split block expects NewBB to have a non-empty set of predecessors. + DT->splitBlock(NewBB); + } + } // The rest of the logic is only relevant for updating the loop structures. if (!LI) return; + assert(DT && "DT should be available to update LoopInfo!"); Loop *L = LI->getLoopFor(OldBB); // If we need to preserve loop analyses, collect some information about how @@ -493,7 +525,6 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // Insert dummy values as the incoming value. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); - return NewBB; } // Update DominatorTree, LoopInfo, and LCCSA analysis information. @@ -501,8 +532,11 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, PreserveLCSSA, HasLoopExit); - // Update the PHI nodes in BB with the values coming from NewBB. - UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit); + if (!Preds.empty()) { + // Update the PHI nodes in BB with the values coming from NewBB. + UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit); + } + return NewBB; } diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index 464d1a34f518..3e30c27a9f33 100644 --- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -28,7 +28,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ValueMapper.h" diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index b60dfb4f3541..5f5c4150d3bb 100644 --- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -105,12 +105,23 @@ static bool setRetNonNull(Function &F) { return true; } +static bool setNonLazyBind(Function &F) { + if (F.hasFnAttribute(Attribute::NonLazyBind)) + return false; + F.addFnAttr(Attribute::NonLazyBind); + return true; +} + bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { LibFunc TheLibFunc; if (!(TLI.getLibFunc(F, TheLibFunc) && TLI.has(TheLibFunc))) return false; bool Changed = false; + + if (F.getParent() != nullptr && F.getParent()->getRtLibUseGOT()) + Changed |= setNonLazyBind(F); + switch (TheLibFunc) { case LibFunc_strlen: case LibFunc_wcslen: @@ -375,6 +386,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_fseek: case LibFunc_ftell: case LibFunc_fgetc: + case LibFunc_fgetc_unlocked: case LibFunc_fseeko: case LibFunc_ftello: case LibFunc_fileno: @@ -393,6 +405,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F); return Changed; case LibFunc_fputc: + case LibFunc_fputc_unlocked: case LibFunc_fstat: case LibFunc_frexp: case LibFunc_frexpf: @@ -402,21 +415,25 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_fgets: + case LibFunc_fgets_unlocked: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_fread: + case LibFunc_fread_unlocked: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 3); return Changed; case LibFunc_fwrite: + case LibFunc_fwrite_unlocked: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 3); // FIXME: readonly #1? return Changed; case LibFunc_fputs: + case LibFunc_fputs_unlocked: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); @@ -447,6 +464,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_gets: case LibFunc_getchar: + case LibFunc_getchar_unlocked: Changed |= setDoesNotThrow(F); return Changed; case LibFunc_getitimer: @@ -485,6 +503,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_putc: + case LibFunc_putc_unlocked: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); return Changed; @@ -505,6 +524,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_putchar: + case LibFunc_putchar_unlocked: Changed |= setDoesNotThrow(F); return Changed; case LibFunc_popen: @@ -687,9 +707,9 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setRetNonNull(F); Changed |= setRetDoesNotAlias(F); return Changed; - //TODO: add LibFunc entries for: - //case LibFunc_memset_pattern4: - //case LibFunc_memset_pattern8: + // TODO: add LibFunc entries for: + // case LibFunc_memset_pattern4: + // case LibFunc_memset_pattern8: case LibFunc_memset_pattern16: Changed |= setOnlyAccessesArgMemory(F); Changed |= setDoesNotCapture(F, 0); @@ -709,6 +729,19 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { } } +bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, + LibFunc DoubleFn, LibFunc FloatFn, + LibFunc LongDoubleFn) { + switch (Ty->getTypeID()) { + case Type::FloatTyID: + return TLI->has(FloatFn); + case Type::DoubleTyID: + return TLI->has(DoubleFn); + default: + return TLI->has(LongDoubleFn); + } +} + //- Emit LibCalls ------------------------------------------------------------// Value *llvm::castToCStr(Value *V, IRBuilder<> &B) { @@ -973,6 +1006,24 @@ Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B, return CI; } +Value *llvm::emitFPutCUnlocked(Value *Char, Value *File, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc_fputc_unlocked)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + Constant *F = M->getOrInsertFunction("fputc_unlocked", B.getInt32Ty(), + B.getInt32Ty(), File->getType()); + if (File->getType()->isPointerTy()) + inferLibFuncAttributes(*M->getFunction("fputc_unlocked"), *TLI); + Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/ true, "chari"); + CallInst *CI = B.CreateCall(F, {Char, File}, "fputc_unlocked"); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); + return CI; +} + Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc_fputs)) @@ -991,6 +1042,24 @@ Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B, return CI; } +Value *llvm::emitFPutSUnlocked(Value *Str, Value *File, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc_fputs_unlocked)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + StringRef FPutsUnlockedName = TLI->getName(LibFunc_fputs_unlocked); + Constant *F = M->getOrInsertFunction(FPutsUnlockedName, B.getInt32Ty(), + B.getInt8PtrTy(), File->getType()); + if (File->getType()->isPointerTy()) + inferLibFuncAttributes(*M->getFunction(FPutsUnlockedName), *TLI); + CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, "fputs_unlocked"); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); + return CI; +} + Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc_fwrite)) @@ -1013,3 +1082,119 @@ Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B, CI->setCallingConv(Fn->getCallingConv()); return CI; } + +Value *llvm::emitMalloc(Value *Num, IRBuilder<> &B, const DataLayout &DL, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc_malloc)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *Malloc = M->getOrInsertFunction("malloc", B.getInt8PtrTy(), + DL.getIntPtrType(Context)); + inferLibFuncAttributes(*M->getFunction("malloc"), *TLI); + CallInst *CI = B.CreateCall(Malloc, Num, "malloc"); + + if (const Function *F = dyn_cast<Function>(Malloc->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +Value *llvm::emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs, + IRBuilder<> &B, const TargetLibraryInfo &TLI) { + if (!TLI.has(LibFunc_calloc)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + const DataLayout &DL = M->getDataLayout(); + IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext())); + Value *Calloc = M->getOrInsertFunction("calloc", Attrs, B.getInt8PtrTy(), + PtrType, PtrType); + inferLibFuncAttributes(*M->getFunction("calloc"), TLI); + CallInst *CI = B.CreateCall(Calloc, {Num, Size}, "calloc"); + + if (const auto *F = dyn_cast<Function>(Calloc->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +Value *llvm::emitFWriteUnlocked(Value *Ptr, Value *Size, Value *N, Value *File, + IRBuilder<> &B, const DataLayout &DL, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc_fwrite_unlocked)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + StringRef FWriteUnlockedName = TLI->getName(LibFunc_fwrite_unlocked); + Constant *F = M->getOrInsertFunction( + FWriteUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(), + DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType()); + + if (File->getType()->isPointerTy()) + inferLibFuncAttributes(*M->getFunction(FWriteUnlockedName), *TLI); + CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File}); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); + return CI; +} + +Value *llvm::emitFGetCUnlocked(Value *File, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc_fgetc_unlocked)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + Constant *F = + M->getOrInsertFunction("fgetc_unlocked", B.getInt32Ty(), File->getType()); + if (File->getType()->isPointerTy()) + inferLibFuncAttributes(*M->getFunction("fgetc_unlocked"), *TLI); + CallInst *CI = B.CreateCall(F, File, "fgetc_unlocked"); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); + return CI; +} + +Value *llvm::emitFGetSUnlocked(Value *Str, Value *Size, Value *File, + IRBuilder<> &B, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc_fgets_unlocked)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + Constant *F = + M->getOrInsertFunction("fgets_unlocked", B.getInt8PtrTy(), + B.getInt8PtrTy(), B.getInt32Ty(), File->getType()); + inferLibFuncAttributes(*M->getFunction("fgets_unlocked"), *TLI); + CallInst *CI = + B.CreateCall(F, {castToCStr(Str, B), Size, File}, "fgets_unlocked"); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); + return CI; +} + +Value *llvm::emitFReadUnlocked(Value *Ptr, Value *Size, Value *N, Value *File, + IRBuilder<> &B, const DataLayout &DL, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc_fread_unlocked)) + return nullptr; + + Module *M = B.GetInsertBlock()->getModule(); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + StringRef FReadUnlockedName = TLI->getName(LibFunc_fread_unlocked); + Constant *F = M->getOrInsertFunction( + FReadUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(), + DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType()); + + if (File->getType()->isPointerTy()) + inferLibFuncAttributes(*M->getFunction(FReadUnlockedName), *TLI); + CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File}); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); + return CI; +} diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp index f711b192f604..05512a6dff3e 100644 --- a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -34,7 +35,6 @@ #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/KnownBits.h" -#include "llvm/Transforms/Utils/Local.h" #include <cassert> #include <cstdint> @@ -173,7 +173,7 @@ Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) { return isDivisionOp() ? Value.Quotient : Value.Remainder; } -/// \brief Check if a value looks like a hash. +/// Check if a value looks like a hash. /// /// The routine is expected to detect values computed using the most common hash /// algorithms. Typically, hash computations end with one of the following diff --git a/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp index 5dc6068d4a0b..4d9c22e57a68 100644 --- a/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -389,12 +389,14 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee, // Inspect the arguments of the call site. If an argument's type doesn't // match the corresponding formal argument's type in the callee, bitcast it // to the correct type. - for (Use &U : CS.args()) { - unsigned ArgNo = CS.getArgumentNo(&U); - Type *FormalTy = Callee->getFunctionType()->getParamType(ArgNo); - Type *ActualTy = U.get()->getType(); + auto CalleeType = Callee->getFunctionType(); + auto CalleeParamNum = CalleeType->getNumParams(); + for (unsigned ArgNo = 0; ArgNo < CalleeParamNum; ++ArgNo) { + auto *Arg = CS.getArgument(ArgNo); + Type *FormalTy = CalleeType->getParamType(ArgNo); + Type *ActualTy = Arg->getType(); if (FormalTy != ActualTy) { - auto *Cast = CastInst::Create(Instruction::BitCast, U.get(), FormalTy, "", + auto *Cast = CastInst::Create(Instruction::BitCast, Arg, FormalTy, "", CS.getInstruction()); CS.setArgument(ArgNo, Cast); } diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp index 16af2c7b808b..61448e9acb57 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" @@ -31,7 +32,6 @@ #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <map> using namespace llvm; @@ -43,44 +43,36 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, DebugInfoFinder *DIFinder) { DenseMap<const MDNode *, MDNode *> Cache; BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); - if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); + if (BB->hasName()) + NewBB->setName(BB->getName() + NameSuffix); bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; Module *TheModule = F ? F->getParent() : nullptr; // Loop over all instructions, and copy them over. - for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); - II != IE; ++II) { - - if (DIFinder && TheModule) { - if (auto *DDI = dyn_cast<DbgDeclareInst>(II)) - DIFinder->processDeclare(*TheModule, DDI); - else if (auto *DVI = dyn_cast<DbgValueInst>(II)) - DIFinder->processValue(*TheModule, DVI); + for (const Instruction &I : *BB) { + if (DIFinder && TheModule) + DIFinder->processInstruction(*TheModule, I); - if (auto DbgLoc = II->getDebugLoc()) - DIFinder->processLocation(*TheModule, DbgLoc.get()); - } - - Instruction *NewInst = II->clone(); - if (II->hasName()) - NewInst->setName(II->getName()+NameSuffix); + Instruction *NewInst = I.clone(); + if (I.hasName()) + NewInst->setName(I.getName() + NameSuffix); NewBB->getInstList().push_back(NewInst); - VMap[&*II] = NewInst; // Add instruction map to value. + VMap[&I] = NewInst; // Add instruction map to value. - hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II)); - if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + hasCalls |= (isa<CallInst>(I) && !isa<DbgInfoIntrinsic>(I)); + if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) { if (isa<ConstantInt>(AI->getArraySize())) hasStaticAllocas = true; else hasDynamicAllocas = true; } } - + if (CodeInfo) { CodeInfo->ContainsCalls |= hasCalls; CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; - CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && + CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && BB != &BB->getParent()->getEntryBlock(); } return NewBB; @@ -175,7 +167,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // Create a new basic block and copy instructions into it! BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo, - SP ? &DIFinder : nullptr); + ModuleLevelChanges ? &DIFinder : nullptr); // Add basic block mapping. VMap[&BB] = CBB; @@ -197,15 +189,15 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, Returns.push_back(RI); } - for (DISubprogram *ISP : DIFinder.subprograms()) { - if (ISP != SP) { + for (DISubprogram *ISP : DIFinder.subprograms()) + if (ISP != SP) VMap.MD()[ISP].reset(ISP); - } - } - for (auto *Type : DIFinder.types()) { + for (DICompileUnit *CU : DIFinder.compile_units()) + VMap.MD()[CU].reset(CU); + + for (DIType *Type : DIFinder.types()) VMap.MD()[Type].reset(Type); - } // Loop over all of the instructions in the function, fixing up operand // references as we go. This uses VMap to do all the hard work. @@ -283,7 +275,7 @@ namespace { /// The specified block is found to be reachable, clone it and /// anything that it can reach. - void CloneBlock(const BasicBlock *BB, + void CloneBlock(const BasicBlock *BB, BasicBlock::const_iterator StartingInst, std::vector<const BasicBlock*> &ToClone); }; @@ -546,7 +538,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // phi nodes will have invalid entries. Update the PHI nodes in this // case. PHINode *PN = cast<PHINode>(NewBB->begin()); - NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); + NumPreds = pred_size(NewBB); if (NumPreds != PN->getNumIncomingValues()) { assert(NumPreds < PN->getNumIncomingValues()); // Count how many times each predecessor comes to this block. @@ -718,7 +710,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ModuleLevelChanges, Returns, NameSuffix, CodeInfo); } -/// \brief Remaps instructions in \p Blocks using the mapping in \p VMap. +/// Remaps instructions in \p Blocks using the mapping in \p VMap. void llvm::remapInstructionsInBlocks( const SmallVectorImpl<BasicBlock *> &Blocks, ValueToValueMapTy &VMap) { // Rewrite the code to refer to itself. @@ -728,7 +720,7 @@ void llvm::remapInstructionsInBlocks( RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); } -/// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p +/// Clones a loop \p OrigLoop. Returns the loop and the blocks in \p /// Blocks. /// /// Updates LoopInfo and DominatorTree assuming the loop is dominated by block @@ -792,12 +784,13 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, return NewLoop; } -/// \brief Duplicate non-Phi instructions from the beginning of block up to +/// Duplicate non-Phi instructions from the beginning of block up to /// StopAt instruction into a split block between BB and its predecessor. BasicBlock * llvm::DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt, - ValueToValueMapTy &ValueMapping) { + ValueToValueMapTy &ValueMapping, + DominatorTree *DT) { // We are going to have to map operands from the original BB block to the new // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to // account for entry from PredBB. @@ -805,13 +798,15 @@ llvm::DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB, for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); - BasicBlock *NewBB = SplitEdge(PredBB, BB); + BasicBlock *NewBB = SplitEdge(PredBB, BB, DT); NewBB->setName(PredBB->getName() + ".split"); Instruction *NewTerm = NewBB->getTerminator(); // Clone the non-phi instructions of BB into NewBB, keeping track of the // mapping and using it to remap operands in the cloned instructions. - for (; StopAt != &*BI; ++BI) { + // Stop once we see the terminator too. This covers the case where BB's + // terminator gets replaced and StopAt == BB's terminator. + for (; StopAt != &*BI && BB->getTerminator() != &*BI; ++BI) { Instruction *New = BI->clone(); New->setName(BI->getName()); New->insertBefore(NewTerm); diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp index 8fee10854229..35c7511a24b9 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp @@ -32,33 +32,34 @@ static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) { /// copies of global variables and functions, and making their (initializers and /// references, respectively) refer to the right globals. /// -std::unique_ptr<Module> llvm::CloneModule(const Module *M) { +std::unique_ptr<Module> llvm::CloneModule(const Module &M) { // Create the value map that maps things from the old module over to the new // module. ValueToValueMapTy VMap; return CloneModule(M, VMap); } -std::unique_ptr<Module> llvm::CloneModule(const Module *M, +std::unique_ptr<Module> llvm::CloneModule(const Module &M, ValueToValueMapTy &VMap) { return CloneModule(M, VMap, [](const GlobalValue *GV) { return true; }); } std::unique_ptr<Module> llvm::CloneModule( - const Module *M, ValueToValueMapTy &VMap, + const Module &M, ValueToValueMapTy &VMap, function_ref<bool(const GlobalValue *)> ShouldCloneDefinition) { // First off, we need to create the new module. std::unique_ptr<Module> New = - llvm::make_unique<Module>(M->getModuleIdentifier(), M->getContext()); - New->setDataLayout(M->getDataLayout()); - New->setTargetTriple(M->getTargetTriple()); - New->setModuleInlineAsm(M->getModuleInlineAsm()); - + llvm::make_unique<Module>(M.getModuleIdentifier(), M.getContext()); + New->setSourceFileName(M.getSourceFileName()); + New->setDataLayout(M.getDataLayout()); + New->setTargetTriple(M.getTargetTriple()); + New->setModuleInlineAsm(M.getModuleInlineAsm()); + // Loop over all of the global variables, making corresponding globals in the // new module. Here we add them to the VMap and to the new Module. We // don't worry about attributes or initializers, they will come later. // - for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { GlobalVariable *GV = new GlobalVariable(*New, I->getValueType(), @@ -72,7 +73,7 @@ std::unique_ptr<Module> llvm::CloneModule( } // Loop over the functions in the module, making external functions as before - for (const Function &I : *M) { + for (const Function &I : M) { Function *NF = Function::Create(cast<FunctionType>(I.getValueType()), I.getLinkage(), I.getName(), New.get()); NF->copyAttributesFrom(&I); @@ -80,7 +81,7 @@ std::unique_ptr<Module> llvm::CloneModule( } // Loop over the aliases in the module - for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); + for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; ++I) { if (!ShouldCloneDefinition(&*I)) { // An alias cannot act as an external reference, so we need to create @@ -114,7 +115,7 @@ std::unique_ptr<Module> llvm::CloneModule( // have been created, loop through and copy the global variable referrers // over... We also set the attributes on the global now. // - for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { if (I->isDeclaration()) continue; @@ -139,7 +140,7 @@ std::unique_ptr<Module> llvm::CloneModule( // Similarly, copy over function bodies now... // - for (const Function &I : *M) { + for (const Function &I : M) { if (I.isDeclaration()) continue; @@ -169,7 +170,7 @@ std::unique_ptr<Module> llvm::CloneModule( } // And aliases - for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); + for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; ++I) { // We already dealt with undefined aliases above. if (!ShouldCloneDefinition(&*I)) @@ -180,8 +181,9 @@ std::unique_ptr<Module> llvm::CloneModule( } // And named metadata.... - for (Module::const_named_metadata_iterator I = M->named_metadata_begin(), - E = M->named_metadata_end(); I != E; ++I) { + for (Module::const_named_metadata_iterator I = M.named_metadata_begin(), + E = M.named_metadata_end(); + I != E; ++I) { const NamedMDNode &NMD = *I; NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName()); for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) @@ -194,7 +196,7 @@ std::unique_ptr<Module> llvm::CloneModule( extern "C" { LLVMModuleRef LLVMCloneModule(LLVMModuleRef M) { - return wrap(CloneModule(unwrap(M)).release()); + return wrap(CloneModule(*unwrap(M)).release()); } } diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 7a404241cb14..f31dab9f96af 100644 --- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -66,6 +66,7 @@ #include <vector> using namespace llvm; +using ProfileCount = Function::ProfileCount; #define DEBUG_TYPE "code-extractor" @@ -77,12 +78,10 @@ static cl::opt<bool> AggregateArgsOpt("aggregate-extracted-args", cl::Hidden, cl::desc("Aggregate arguments to code-extracted functions")); -/// \brief Test whether a block is valid for extraction. -bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB, - bool AllowVarArgs) { - // Landing pads must be in the function where they were inserted for cleanup. - if (BB.isEHPad()) - return false; +/// Test whether a block is valid for extraction. +static bool isBlockValidForExtraction(const BasicBlock &BB, + const SetVector<BasicBlock *> &Result, + bool AllowVarArgs, bool AllowAlloca) { // taking the address of a basic block moved to another function is illegal if (BB.hasAddressTaken()) return false; @@ -111,11 +110,63 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB, } } - // Don't hoist code containing allocas or invokes. If explicitly requested, - // allow vastart. + // If explicitly requested, allow vastart and alloca. For invoke instructions + // verify that extraction is valid. for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) { - if (isa<AllocaInst>(I) || isa<InvokeInst>(I)) - return false; + if (isa<AllocaInst>(I)) { + if (!AllowAlloca) + return false; + continue; + } + + if (const auto *II = dyn_cast<InvokeInst>(I)) { + // Unwind destination (either a landingpad, catchswitch, or cleanuppad) + // must be a part of the subgraph which is being extracted. + if (auto *UBB = II->getUnwindDest()) + if (!Result.count(UBB)) + return false; + continue; + } + + // All catch handlers of a catchswitch instruction as well as the unwind + // destination must be in the subgraph. + if (const auto *CSI = dyn_cast<CatchSwitchInst>(I)) { + if (auto *UBB = CSI->getUnwindDest()) + if (!Result.count(UBB)) + return false; + for (auto *HBB : CSI->handlers()) + if (!Result.count(const_cast<BasicBlock*>(HBB))) + return false; + continue; + } + + // Make sure that entire catch handler is within subgraph. It is sufficient + // to check that catch return's block is in the list. + if (const auto *CPI = dyn_cast<CatchPadInst>(I)) { + for (const auto *U : CPI->users()) + if (const auto *CRI = dyn_cast<CatchReturnInst>(U)) + if (!Result.count(const_cast<BasicBlock*>(CRI->getParent()))) + return false; + continue; + } + + // And do similar checks for cleanup handler - the entire handler must be + // in subgraph which is going to be extracted. For cleanup return should + // additionally check that the unwind destination is also in the subgraph. + if (const auto *CPI = dyn_cast<CleanupPadInst>(I)) { + for (const auto *U : CPI->users()) + if (const auto *CRI = dyn_cast<CleanupReturnInst>(U)) + if (!Result.count(const_cast<BasicBlock*>(CRI->getParent()))) + return false; + continue; + } + if (const auto *CRI = dyn_cast<CleanupReturnInst>(I)) { + if (auto *UBB = CRI->getUnwindDest()) + if (!Result.count(UBB)) + return false; + continue; + } + if (const CallInst *CI = dyn_cast<CallInst>(I)) if (const Function *F = CI->getCalledFunction()) if (F->getIntrinsicID() == Intrinsic::vastart) { @@ -129,10 +180,10 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB, return true; } -/// \brief Build a set of blocks to extract if the input blocks are viable. +/// Build a set of blocks to extract if the input blocks are viable. static SetVector<BasicBlock *> buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, - bool AllowVarArgs) { + bool AllowVarArgs, bool AllowAlloca) { assert(!BBs.empty() && "The set of blocks to extract must be non-empty"); SetVector<BasicBlock *> Result; @@ -145,32 +196,42 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, if (!Result.insert(BB)) llvm_unreachable("Repeated basic blocks in extraction input"); - if (!CodeExtractor::isBlockValidForExtraction(*BB, AllowVarArgs)) { - Result.clear(); - return Result; - } } -#ifndef NDEBUG - for (SetVector<BasicBlock *>::iterator I = std::next(Result.begin()), - E = Result.end(); - I != E; ++I) - for (pred_iterator PI = pred_begin(*I), PE = pred_end(*I); - PI != PE; ++PI) - assert(Result.count(*PI) && - "No blocks in this region may have entries from outside the region" - " except for the first block!"); -#endif + for (auto *BB : Result) { + if (!isBlockValidForExtraction(*BB, Result, AllowVarArgs, AllowAlloca)) + return {}; + + // Make sure that the first block is not a landing pad. + if (BB == Result.front()) { + if (BB->isEHPad()) { + LLVM_DEBUG(dbgs() << "The first block cannot be an unwind block\n"); + return {}; + } + continue; + } + + // All blocks other than the first must not have predecessors outside of + // the subgraph which is being extracted. + for (auto *PBB : predecessors(BB)) + if (!Result.count(PBB)) { + LLVM_DEBUG( + dbgs() << "No blocks in this region may have entries from " + "outside the region except for the first block!\n"); + return {}; + } + } return Result; } CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI, bool AllowVarArgs) + BranchProbabilityInfo *BPI, bool AllowVarArgs, + bool AllowAlloca) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), BPI(BPI), AllowVarArgs(AllowVarArgs), - Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs)) {} + Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)) {} CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BlockFrequencyInfo *BFI, @@ -178,7 +239,8 @@ CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), BPI(BPI), AllowVarArgs(false), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, - /* AllowVarArgs */ false)) {} + /* AllowVarArgs */ false, + /* AllowAlloca */ false)) {} /// definedInRegion - Return true if the specified value is defined in the /// extracted region. @@ -562,8 +624,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, BasicBlock *newHeader, Function *oldFunction, Module *M) { - DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); - DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); + LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); + LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); // This function returns unsigned, outputs will go back by reference. switch (NumExitBlocks) { @@ -577,20 +639,20 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, // Add the types of the input values to the function's argument list for (Value *value : inputs) { - DEBUG(dbgs() << "value used in func: " << *value << "\n"); + LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n"); paramTy.push_back(value->getType()); } // Add the types of the output values to the function's argument list. for (Value *output : outputs) { - DEBUG(dbgs() << "instr used in func: " << *output << "\n"); + LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n"); if (AggregateArgs) paramTy.push_back(output->getType()); else paramTy.push_back(PointerType::getUnqual(output->getType())); } - DEBUG({ + LLVM_DEBUG({ dbgs() << "Function type: " << *RetTy << " f("; for (Type *i : paramTy) dbgs() << *i << ", "; @@ -620,16 +682,89 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, if (oldFunction->hasUWTable()) newFunction->setHasUWTable(); - // Inherit all of the target dependent attributes. + // Inherit all of the target dependent attributes and white-listed + // target independent attributes. // (e.g. If the extracted region contains a call to an x86.sse // instruction we need to make sure that the extracted region has the // "target-features" attribute allowing it to be lowered. // FIXME: This should be changed to check to see if a specific // attribute can not be inherited. - AttrBuilder AB(oldFunction->getAttributes().getFnAttributes()); - for (const auto &Attr : AB.td_attrs()) - newFunction->addFnAttr(Attr.first, Attr.second); + for (const auto &Attr : oldFunction->getAttributes().getFnAttributes()) { + if (Attr.isStringAttribute()) { + if (Attr.getKindAsString() == "thunk") + continue; + } else + switch (Attr.getKindAsEnum()) { + // Those attributes cannot be propagated safely. Explicitly list them + // here so we get a warning if new attributes are added. This list also + // includes non-function attributes. + case Attribute::Alignment: + case Attribute::AllocSize: + case Attribute::ArgMemOnly: + case Attribute::Builtin: + case Attribute::ByVal: + case Attribute::Convergent: + case Attribute::Dereferenceable: + case Attribute::DereferenceableOrNull: + case Attribute::InAlloca: + case Attribute::InReg: + case Attribute::InaccessibleMemOnly: + case Attribute::InaccessibleMemOrArgMemOnly: + case Attribute::JumpTable: + case Attribute::Naked: + case Attribute::Nest: + case Attribute::NoAlias: + case Attribute::NoBuiltin: + case Attribute::NoCapture: + case Attribute::NoReturn: + case Attribute::None: + case Attribute::NonNull: + case Attribute::ReadNone: + case Attribute::ReadOnly: + case Attribute::Returned: + case Attribute::ReturnsTwice: + case Attribute::SExt: + case Attribute::Speculatable: + case Attribute::StackAlignment: + case Attribute::StructRet: + case Attribute::SwiftError: + case Attribute::SwiftSelf: + case Attribute::WriteOnly: + case Attribute::ZExt: + case Attribute::EndAttrKinds: + continue; + // Those attributes should be safe to propagate to the extracted function. + case Attribute::AlwaysInline: + case Attribute::Cold: + case Attribute::NoRecurse: + case Attribute::InlineHint: + case Attribute::MinSize: + case Attribute::NoDuplicate: + case Attribute::NoImplicitFloat: + case Attribute::NoInline: + case Attribute::NonLazyBind: + case Attribute::NoRedZone: + case Attribute::NoUnwind: + case Attribute::OptForFuzzing: + case Attribute::OptimizeNone: + case Attribute::OptimizeForSize: + case Attribute::SafeStack: + case Attribute::ShadowCallStack: + case Attribute::SanitizeAddress: + case Attribute::SanitizeMemory: + case Attribute::SanitizeThread: + case Attribute::SanitizeHWAddress: + case Attribute::StackProtect: + case Attribute::StackProtectReq: + case Attribute::StackProtectStrong: + case Attribute::StrictFP: + case Attribute::UWTable: + case Attribute::NoCfCheck: + break; + } + newFunction->addFnAttr(Attr); + } newFunction->getBasicBlockList().push_back(newRootNode); // Create an iterator to name all of the arguments we inserted. @@ -1093,10 +1228,10 @@ Function *CodeExtractor::extractCodeRegion() { // Update the entry count of the function. if (BFI) { - Optional<uint64_t> EntryCount = - BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); - if (EntryCount.hasValue()) - newFunction->setEntryCount(EntryCount.getValue()); + auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); + if (Count.hasValue()) + newFunction->setEntryCount( + ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); } @@ -1104,6 +1239,10 @@ Function *CodeExtractor::extractCodeRegion() { moveCodeToFunction(newFunction); + // Propagate personality info to the new function if there is one. + if (oldFunction->hasPersonalityFn()) + newFunction->setPersonalityFn(oldFunction->getPersonalityFn()); + // Update the branch weights for the exit block. if (BFI && NumExitBlocks > 1) calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); @@ -1139,7 +1278,7 @@ Function *CodeExtractor::extractCodeRegion() { } } - DEBUG(if (verifyFunction(*newFunction)) - report_fatal_error("verifyFunction failed!")); + LLVM_DEBUG(if (verifyFunction(*newFunction)) + report_fatal_error("verifyFunction failed!")); return newFunction; } diff --git a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp index 82b67c293102..9a0240144d08 100644 --- a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp @@ -138,7 +138,7 @@ bool optimizeGlobalCtorsList(Module &M, if (!F) continue; - DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n"); + LLVM_DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n"); // We cannot simplify external ctor functions. if (F->empty()) diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp index 6d3d287defdb..56ff03c7f5e1 100644 --- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -9,11 +9,11 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" using namespace llvm; /// DemoteRegToStack - This function takes a virtual register computed by an diff --git a/contrib/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/contrib/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp index 421663f82565..569ea58a3047 100644 --- a/contrib/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp +++ b/contrib/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp @@ -9,14 +9,13 @@ #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" using namespace llvm; static void insertCall(Function &CurFn, StringRef Func, @@ -92,17 +91,27 @@ static bool runOnFunction(Function &F, bool PostInlining) { if (!ExitFunc.empty()) { for (BasicBlock &BB : F) { - TerminatorInst *T = BB.getTerminator(); + Instruction *T = BB.getTerminator(); + if (!isa<ReturnInst>(T)) + continue; + + // If T is preceded by a musttail call, that's the real terminator. + Instruction *Prev = T->getPrevNode(); + if (BitCastInst *BCI = dyn_cast_or_null<BitCastInst>(Prev)) + Prev = BCI->getPrevNode(); + if (CallInst *CI = dyn_cast_or_null<CallInst>(Prev)) { + if (CI->isMustTailCall()) + T = CI; + } + DebugLoc DL; if (DebugLoc TerminatorDL = T->getDebugLoc()) DL = TerminatorDL; else if (auto SP = F.getSubprogram()) DL = DebugLoc::get(0, 0, SP); - if (isa<ReturnInst>(T)) { - insertCall(F, ExitFunc, T, DL); - Changed = true; - } + insertCall(F, ExitFunc, T, DL); + Changed = true; } F.removeAttribute(AttributeList::FunctionIndex, ExitAttr); } diff --git a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp index 78d7474e5b95..c9c96fbe5da0 100644 --- a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp +++ b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp @@ -14,9 +14,9 @@ #include "llvm/Transforms/Utils/EscapeEnumerator.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Module.h" -#include "llvm/Transforms/Utils/Local.h" using namespace llvm; static Constant *getDefaultPersonalityFn(Module *M) { @@ -73,8 +73,8 @@ IRBuilder<> *EscapeEnumerator::Next() { F.setPersonalityFn(PersFn); } - if (isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) { - report_fatal_error("Funclet EH not supported"); + if (isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) { + report_fatal_error("Scoped EH not supported"); } LandingPadInst *LPad = diff --git a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp index 3c5e299fae98..7fd9425efed3 100644 --- a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InstrTypes.h" @@ -174,6 +175,11 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return false; } +static Constant *getInitializer(Constant *C) { + auto *GV = dyn_cast<GlobalVariable>(C); + return GV && GV->hasDefinitiveInitializer() ? GV->getInitializer() : nullptr; +} + /// Return the value that would be computed by a load from P after the stores /// reflected by 'memory' have been performed. If we can't decide, return null. Constant *Evaluator::ComputeLoadResult(Constant *P) { @@ -189,18 +195,96 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) { return nullptr; } - // Handle a constantexpr getelementptr. - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) - if (CE->getOpcode() == Instruction::GetElementPtr && - isa<GlobalVariable>(CE->getOperand(0))) { - GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); - if (GV->hasDefinitiveInitializer()) - return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) { + switch (CE->getOpcode()) { + // Handle a constantexpr getelementptr. + case Instruction::GetElementPtr: + if (auto *I = getInitializer(CE->getOperand(0))) + return ConstantFoldLoadThroughGEPConstantExpr(I, CE); + break; + // Handle a constantexpr bitcast. + case Instruction::BitCast: + Constant *Val = getVal(CE->getOperand(0)); + auto MM = MutatedMemory.find(Val); + auto *I = (MM != MutatedMemory.end()) ? MM->second + : getInitializer(CE->getOperand(0)); + if (I) + return ConstantFoldLoadThroughBitcast( + I, P->getType()->getPointerElementType(), DL); + break; } + } return nullptr; // don't know how to evaluate. } +static Function *getFunction(Constant *C) { + if (auto *Fn = dyn_cast<Function>(C)) + return Fn; + + if (auto *Alias = dyn_cast<GlobalAlias>(C)) + if (auto *Fn = dyn_cast<Function>(Alias->getAliasee())) + return Fn; + return nullptr; +} + +Function * +Evaluator::getCalleeWithFormalArgs(CallSite &CS, + SmallVector<Constant *, 8> &Formals) { + auto *V = CS.getCalledValue(); + if (auto *Fn = getFunction(getVal(V))) + return getFormalParams(CS, Fn, Formals) ? Fn : nullptr; + + auto *CE = dyn_cast<ConstantExpr>(V); + if (!CE || CE->getOpcode() != Instruction::BitCast || + !getFormalParams(CS, getFunction(CE->getOperand(0)), Formals)) + return nullptr; + + return dyn_cast<Function>( + ConstantFoldLoadThroughBitcast(CE, CE->getOperand(0)->getType(), DL)); +} + +bool Evaluator::getFormalParams(CallSite &CS, Function *F, + SmallVector<Constant *, 8> &Formals) { + if (!F) + return false; + + auto *FTy = F->getFunctionType(); + if (FTy->getNumParams() > CS.getNumArgOperands()) { + LLVM_DEBUG(dbgs() << "Too few arguments for function.\n"); + return false; + } + + auto ArgI = CS.arg_begin(); + for (auto ParI = FTy->param_begin(), ParE = FTy->param_end(); ParI != ParE; + ++ParI) { + auto *ArgC = ConstantFoldLoadThroughBitcast(getVal(*ArgI), *ParI, DL); + if (!ArgC) { + LLVM_DEBUG(dbgs() << "Can not convert function argument.\n"); + return false; + } + Formals.push_back(ArgC); + ++ArgI; + } + return true; +} + +/// If call expression contains bitcast then we may need to cast +/// evaluated return value to a type of the call expression. +Constant *Evaluator::castCallResultIfNeeded(Value *CallExpr, Constant *RV) { + ConstantExpr *CE = dyn_cast<ConstantExpr>(CallExpr); + if (!RV || !CE || CE->getOpcode() != Instruction::BitCast) + return RV; + + if (auto *FT = + dyn_cast<FunctionType>(CE->getType()->getPointerElementType())) { + RV = ConstantFoldLoadThroughBitcast(RV, FT->getReturnType(), DL); + if (!RV) + LLVM_DEBUG(dbgs() << "Failed to fold bitcast call expr\n"); + } + return RV; +} + /// Evaluate all instructions in block BB, returning true if successful, false /// if we can't evaluate it. NewBB returns the next BB that control flows into, /// or null upon return. @@ -210,22 +294,23 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, while (true) { Constant *InstResult = nullptr; - DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); + LLVM_DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { if (!SI->isSimple()) { - DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n"); + LLVM_DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n"); return false; // no volatile/atomic accesses. } Constant *Ptr = getVal(SI->getOperand(1)); if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) { - DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr); + LLVM_DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr); Ptr = FoldedPtr; - DEBUG(dbgs() << "; To: " << *Ptr << "\n"); + LLVM_DEBUG(dbgs() << "; To: " << *Ptr << "\n"); } if (!isSimpleEnoughPointerToCommit(Ptr)) { // If this is too complex for us to commit, reject it. - DEBUG(dbgs() << "Pointer is too complex for us to evaluate store."); + LLVM_DEBUG( + dbgs() << "Pointer is too complex for us to evaluate store."); return false; } @@ -234,14 +319,15 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // If this might be too difficult for the backend to handle (e.g. the addr // of one global variable divided by another) then we can't commit it. if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) { - DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val - << "\n"); + LLVM_DEBUG(dbgs() << "Store value is too complex to evaluate store. " + << *Val << "\n"); return false; } if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { if (CE->getOpcode() == Instruction::BitCast) { - DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n"); + LLVM_DEBUG(dbgs() + << "Attempting to resolve bitcast on constant ptr.\n"); // If we're evaluating a store through a bitcast, then we need // to pull the bitcast off the pointer type and push it onto the // stored value. @@ -252,7 +338,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // In order to push the bitcast onto the stored value, a bitcast // from NewTy to Val's type must be legal. If it's not, we can try // introspecting NewTy to find a legal conversion. - while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) { + Constant *NewVal; + while (!(NewVal = ConstantFoldLoadThroughBitcast(Val, NewTy, DL))) { // If NewTy is a struct, we can convert the pointer to the struct // into a pointer to its first member. // FIXME: This could be extended to support arrays as well. @@ -270,17 +357,14 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // If we can't improve the situation by introspecting NewTy, // we have to give up. } else { - DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " - "evaluate.\n"); + LLVM_DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " + "evaluate.\n"); return false; } } - // If we found compatible types, go ahead and push the bitcast - // onto the stored value. - Val = ConstantExpr::getBitCast(Val, NewTy); - - DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n"); + Val = NewVal; + LLVM_DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n"); } } @@ -289,37 +373,37 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, InstResult = ConstantExpr::get(BO->getOpcode(), getVal(BO->getOperand(0)), getVal(BO->getOperand(1))); - DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult - << "\n"); + LLVM_DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " + << *InstResult << "\n"); } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) { InstResult = ConstantExpr::getCompare(CI->getPredicate(), getVal(CI->getOperand(0)), getVal(CI->getOperand(1))); - DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult - << "\n"); + LLVM_DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult + << "\n"); } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) { InstResult = ConstantExpr::getCast(CI->getOpcode(), getVal(CI->getOperand(0)), CI->getType()); - DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult - << "\n"); + LLVM_DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult + << "\n"); } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) { InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)), getVal(SI->getOperand(1)), getVal(SI->getOperand(2))); - DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult - << "\n"); + LLVM_DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult + << "\n"); } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) { InstResult = ConstantExpr::getExtractValue( getVal(EVI->getAggregateOperand()), EVI->getIndices()); - DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " << *InstResult - << "\n"); + LLVM_DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " + << *InstResult << "\n"); } else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) { InstResult = ConstantExpr::getInsertValue( getVal(IVI->getAggregateOperand()), getVal(IVI->getInsertedValueOperand()), IVI->getIndices()); - DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " << *InstResult - << "\n"); + LLVM_DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " + << *InstResult << "\n"); } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { Constant *P = getVal(GEP->getOperand(0)); SmallVector<Constant*, 8> GEPOps; @@ -329,60 +413,63 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, InstResult = ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps, cast<GEPOperator>(GEP)->isInBounds()); - DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult - << "\n"); + LLVM_DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult << "\n"); } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { if (!LI->isSimple()) { - DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n"); + LLVM_DEBUG( + dbgs() << "Found a Load! Not a simple load, can not evaluate.\n"); return false; // no volatile/atomic accesses. } Constant *Ptr = getVal(LI->getOperand(0)); if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) { Ptr = FoldedPtr; - DEBUG(dbgs() << "Found a constant pointer expression, constant " - "folding: " << *Ptr << "\n"); + LLVM_DEBUG(dbgs() << "Found a constant pointer expression, constant " + "folding: " + << *Ptr << "\n"); } InstResult = ComputeLoadResult(Ptr); if (!InstResult) { - DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load." - "\n"); + LLVM_DEBUG( + dbgs() << "Failed to compute load result. Can not evaluate load." + "\n"); return false; // Could not evaluate load. } - DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n"); + LLVM_DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n"); } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { if (AI->isArrayAllocation()) { - DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n"); + LLVM_DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n"); return false; // Cannot handle array allocs. } Type *Ty = AI->getAllocatedType(); AllocaTmps.push_back(llvm::make_unique<GlobalVariable>( Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), - AI->getName())); + AI->getName(), /*TLMode=*/GlobalValue::NotThreadLocal, + AI->getType()->getPointerAddressSpace())); InstResult = AllocaTmps.back().get(); - DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n"); + LLVM_DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n"); } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) { CallSite CS(&*CurInst); // Debug info can safely be ignored here. if (isa<DbgInfoIntrinsic>(CS.getInstruction())) { - DEBUG(dbgs() << "Ignoring debug info.\n"); + LLVM_DEBUG(dbgs() << "Ignoring debug info.\n"); ++CurInst; continue; } // Cannot handle inline asm. if (isa<InlineAsm>(CS.getCalledValue())) { - DEBUG(dbgs() << "Found inline asm, can not evaluate.\n"); + LLVM_DEBUG(dbgs() << "Found inline asm, can not evaluate.\n"); return false; } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) { if (MSI->isVolatile()) { - DEBUG(dbgs() << "Can not optimize a volatile memset " << - "intrinsic.\n"); + LLVM_DEBUG(dbgs() << "Can not optimize a volatile memset " + << "intrinsic.\n"); return false; } Constant *Ptr = getVal(MSI->getDest()); @@ -390,7 +477,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, Constant *DestVal = ComputeLoadResult(getVal(Ptr)); if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { // This memset is a no-op. - DEBUG(dbgs() << "Ignoring no-op memset.\n"); + LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n"); ++CurInst; continue; } @@ -398,7 +485,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, if (II->getIntrinsicID() == Intrinsic::lifetime_start || II->getIntrinsicID() == Intrinsic::lifetime_end) { - DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n"); + LLVM_DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n"); ++CurInst; continue; } @@ -407,7 +494,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // We don't insert an entry into Values, as it doesn't have a // meaningful return value. if (!II->use_empty()) { - DEBUG(dbgs() << "Found unused invariant_start. Can't evaluate.\n"); + LLVM_DEBUG(dbgs() + << "Found unused invariant_start. Can't evaluate.\n"); return false; } ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0)); @@ -419,54 +507,54 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, Size->getValue().getLimitedValue() >= DL.getTypeStoreSize(ElemTy)) { Invariants.insert(GV); - DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV - << "\n"); + LLVM_DEBUG(dbgs() << "Found a global var that is an invariant: " + << *GV << "\n"); } else { - DEBUG(dbgs() << "Found a global var, but can not treat it as an " - "invariant.\n"); + LLVM_DEBUG(dbgs() + << "Found a global var, but can not treat it as an " + "invariant.\n"); } } // Continue even if we do nothing. ++CurInst; continue; } else if (II->getIntrinsicID() == Intrinsic::assume) { - DEBUG(dbgs() << "Skipping assume intrinsic.\n"); + LLVM_DEBUG(dbgs() << "Skipping assume intrinsic.\n"); ++CurInst; continue; } else if (II->getIntrinsicID() == Intrinsic::sideeffect) { - DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n"); + LLVM_DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n"); ++CurInst; continue; } - DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); + LLVM_DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); return false; } // Resolve function pointers. - Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue())); + SmallVector<Constant *, 8> Formals; + Function *Callee = getCalleeWithFormalArgs(CS, Formals); if (!Callee || Callee->isInterposable()) { - DEBUG(dbgs() << "Can not resolve function pointer.\n"); + LLVM_DEBUG(dbgs() << "Can not resolve function pointer.\n"); return false; // Cannot resolve. } - SmallVector<Constant*, 8> Formals; - for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) - Formals.push_back(getVal(*i)); - if (Callee->isDeclaration()) { // If this is a function we can constant fold, do it. if (Constant *C = ConstantFoldCall(CS, Callee, Formals, TLI)) { - InstResult = C; - DEBUG(dbgs() << "Constant folded function call. Result: " << - *InstResult << "\n"); + InstResult = castCallResultIfNeeded(CS.getCalledValue(), C); + if (!InstResult) + return false; + LLVM_DEBUG(dbgs() << "Constant folded function call. Result: " + << *InstResult << "\n"); } else { - DEBUG(dbgs() << "Can not constant fold function call.\n"); + LLVM_DEBUG(dbgs() << "Can not constant fold function call.\n"); return false; } } else { if (Callee->getFunctionType()->isVarArg()) { - DEBUG(dbgs() << "Can not constant fold vararg function call.\n"); + LLVM_DEBUG(dbgs() << "Can not constant fold vararg function call.\n"); return false; } @@ -474,21 +562,24 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // Execute the call, if successful, use the return value. ValueStack.emplace_back(); if (!EvaluateFunction(Callee, RetVal, Formals)) { - DEBUG(dbgs() << "Failed to evaluate function.\n"); + LLVM_DEBUG(dbgs() << "Failed to evaluate function.\n"); return false; } ValueStack.pop_back(); - InstResult = RetVal; + InstResult = castCallResultIfNeeded(CS.getCalledValue(), RetVal); + if (RetVal && !InstResult) + return false; if (InstResult) { - DEBUG(dbgs() << "Successfully evaluated function. Result: " - << *InstResult << "\n\n"); + LLVM_DEBUG(dbgs() << "Successfully evaluated function. Result: " + << *InstResult << "\n\n"); } else { - DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n"); + LLVM_DEBUG(dbgs() + << "Successfully evaluated function. Result: 0\n\n"); } } } else if (isa<TerminatorInst>(CurInst)) { - DEBUG(dbgs() << "Found a terminator instruction.\n"); + LLVM_DEBUG(dbgs() << "Found a terminator instruction.\n"); if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) { if (BI->isUnconditional()) { @@ -515,17 +606,18 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, NextBB = nullptr; } else { // invoke, unwind, resume, unreachable. - DEBUG(dbgs() << "Can not handle terminator."); + LLVM_DEBUG(dbgs() << "Can not handle terminator."); return false; // Cannot handle this terminator. } // We succeeded at evaluating this block! - DEBUG(dbgs() << "Successfully evaluated block.\n"); + LLVM_DEBUG(dbgs() << "Successfully evaluated block.\n"); return true; } else { // Did not know how to evaluate this! - DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction." - "\n"); + LLVM_DEBUG( + dbgs() << "Failed to evaluate block due to unhandled instruction." + "\n"); return false; } @@ -539,7 +631,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // If we just processed an invoke, we finished evaluating the block. if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) { NextBB = II->getNormalDest(); - DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n"); + LLVM_DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n"); return true; } @@ -578,7 +670,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, while (true) { BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings. - DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n"); + LLVM_DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n"); if (!EvaluateBlock(CurInst, NextBB)) return false; diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp index 5fdcc6d1d727..3c6c9c9a5df4 100644 --- a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/IRBuilder.h" @@ -24,7 +25,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" #include <cassert> using namespace llvm; @@ -36,16 +36,16 @@ namespace { class FlattenCFGOpt { AliasAnalysis *AA; - /// \brief Use parallel-and or parallel-or to generate conditions for + /// Use parallel-and or parallel-or to generate conditions for /// conditional branches. bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder); - /// \brief If \param BB is the merge block of an if-region, attempt to merge + /// If \param BB is the merge block of an if-region, attempt to merge /// the if-region with an adjacent if-region upstream if two if-regions /// contain identical instructions. bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder); - /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which + /// Compare a pair of blocks: \p Block1 and \p Block2, which /// are from two if-regions whose entry blocks are \p Head1 and \p /// Head2. \returns true if \p Block1 and \p Block2 contain identical /// instructions, and have no memory reference alias with \p Head2. @@ -312,7 +312,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) { new UnreachableInst(CB->getContext(), CB); } while (Iteration); - DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock); + LLVM_DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock); return true; } @@ -469,7 +469,7 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) { // Remove \param SecondEntryBlock SecondEntryBlock->dropAllReferences(); SecondEntryBlock->eraseFromParent(); - DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock); + LLVM_DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock); return true; } diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp index 75539428b688..69203f9f2485 100644 --- a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -18,7 +18,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -377,7 +376,7 @@ int FunctionComparator::cmpConstants(const Constant *L, } } default: // Unknown constant, abort. - DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n"); + LLVM_DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n"); llvm_unreachable("Constant ValueID not recognized."); return -1; } @@ -925,7 +924,7 @@ FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) { H.add(F.arg_size()); SmallVector<const BasicBlock *, 8> BBs; - SmallSet<const BasicBlock *, 16> VisitedBBs; + SmallPtrSet<const BasicBlock *, 16> VisitedBBs; // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(), // accumulating the hash of the function "structure." (BB and opcode sequence) diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp index 6b5f593073b4..479816a339d0 100644 --- a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -206,15 +206,10 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { // definition. if (GV.hasName()) { ValueInfo VI = ImportIndex.getValueInfo(GV.getGUID()); - if (VI) { - // Need to check all summaries are local in case of hash collisions. - bool IsLocal = VI.getSummaryList().size() && - llvm::all_of(VI.getSummaryList(), - [](const std::unique_ptr<GlobalValueSummary> &Summary) { - return Summary->isDSOLocal(); - }); - if (IsLocal) - GV.setDSOLocal(true); + if (VI && VI.isDSOLocal()) { + GV.setDSOLocal(true); + if (GV.hasDLLImportStorageClass()) + GV.setDLLStorageClass(GlobalValue::DefaultStorageClass); } } diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp index 245fefb38ee8..ff6970db47da 100644 --- a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp +++ b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -60,7 +60,7 @@ bool llvm::isSafeToDestroyConstant(const Constant *C) { } static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, - SmallPtrSetImpl<const PHINode *> &PhiUsers) { + SmallPtrSetImpl<const Value *> &VisitedUsers) { if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) if (GV->isExternallyInitialized()) GS.StoredType = GlobalStatus::StoredOnce; @@ -75,7 +75,8 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, if (!isa<PointerType>(CE->getType())) return true; - if (analyzeGlobalAux(CE, GS, PhiUsers)) + // FIXME: Do we need to add constexpr selects to VisitedUsers? + if (analyzeGlobalAux(CE, GS, VisitedUsers)) return true; } else if (const Instruction *I = dyn_cast<Instruction>(UR)) { if (!GS.HasMultipleAccessingFunctions) { @@ -137,20 +138,18 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, GS.StoredType = GlobalStatus::Stored; } } - } else if (isa<BitCastInst>(I)) { - if (analyzeGlobalAux(I, GS, PhiUsers)) + } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) { + // Skip over bitcasts and GEPs; we don't care about the type or offset + // of the pointer. + if (analyzeGlobalAux(I, GS, VisitedUsers)) return true; - } else if (isa<GetElementPtrInst>(I)) { - if (analyzeGlobalAux(I, GS, PhiUsers)) - return true; - } else if (isa<SelectInst>(I)) { - if (analyzeGlobalAux(I, GS, PhiUsers)) - return true; - } else if (const PHINode *PN = dyn_cast<PHINode>(I)) { - // PHI nodes we can check just like select or GEP instructions, but we - // have to be careful about infinite recursion. - if (PhiUsers.insert(PN).second) // Not already visited. - if (analyzeGlobalAux(I, GS, PhiUsers)) + } else if (isa<SelectInst>(I) || isa<PHINode>(I)) { + // Look through selects and PHIs to find if the pointer is + // conditionally accessed. Make sure we only visit an instruction + // once; otherwise, we can get infinite recursion or exponential + // compile time. + if (VisitedUsers.insert(I).second) + if (analyzeGlobalAux(I, GS, VisitedUsers)) return true; } else if (isa<CmpInst>(I)) { GS.IsCompared = true; @@ -191,6 +190,6 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, GlobalStatus::GlobalStatus() = default; bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) { - SmallPtrSet<const PHINode *, 16> PhiUsers; - return analyzeGlobalAux(V, GS, PhiUsers); + SmallPtrSet<const Value *, 16> VisitedUsers; + return analyzeGlobalAux(V, GS, VisitedUsers); } diff --git a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp index b8c12ad5ea84..8382220fc9e1 100644 --- a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp @@ -161,7 +161,7 @@ void ImportedFunctionsInliningStatistics::dump(const bool Verbose) { void ImportedFunctionsInliningStatistics::calculateRealInlines() { // Removing duplicated Callers. - std::sort(NonImportedCallers.begin(), NonImportedCallers.end()); + llvm::sort(NonImportedCallers.begin(), NonImportedCallers.end()); NonImportedCallers.erase( std::unique(NonImportedCallers.begin(), NonImportedCallers.end()), NonImportedCallers.end()); @@ -190,13 +190,14 @@ ImportedFunctionsInliningStatistics::getSortedNodes() { for (const NodesMapTy::value_type& Node : NodesMap) SortedNodes.push_back(&Node); - std::sort( + llvm::sort( SortedNodes.begin(), SortedNodes.end(), [&](const SortedNodesTy::value_type &Lhs, const SortedNodesTy::value_type &Rhs) { if (Lhs->second->NumberOfInlines != Rhs->second->NumberOfInlines) return Lhs->second->NumberOfInlines > Rhs->second->NumberOfInlines; - if (Lhs->second->NumberOfRealInlines != Rhs->second->NumberOfRealInlines) + if (Lhs->second->NumberOfRealInlines != + Rhs->second->NumberOfRealInlines) return Lhs->second->NumberOfRealInlines > Rhs->second->NumberOfRealInlines; return Lhs->first() < Rhs->first(); diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp index fedf6e100d6c..0315aac1cf84 100644 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -29,6 +29,7 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" @@ -60,7 +61,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> #include <cassert> @@ -72,6 +72,7 @@ #include <vector> using namespace llvm; +using ProfileCount = Function::ProfileCount; static cl::opt<bool> EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), @@ -1247,7 +1248,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, // Always generate a memcpy of alignment 1 here because we don't know // the alignment of the src pointer. Other optimizations can infer // better alignment. - Builder.CreateMemCpy(Dst, Src, Size, /*Align=*/1); + Builder.CreateMemCpy(Dst, /*DstAlign*/1, Src, /*SrcAlign*/1, Size); } /// When inlining a call site that has a byval argument, @@ -1431,29 +1432,29 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock, /// Update the branch metadata for cloned call instructions. static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, - const Optional<uint64_t> &CalleeEntryCount, + const ProfileCount &CalleeEntryCount, const Instruction *TheCall, ProfileSummaryInfo *PSI, BlockFrequencyInfo *CallerBFI) { - if (!CalleeEntryCount.hasValue() || CalleeEntryCount.getValue() < 1) + if (!CalleeEntryCount.hasValue() || CalleeEntryCount.isSynthetic() || + CalleeEntryCount.getCount() < 1) return; - Optional<uint64_t> CallSiteCount = - PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; + auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; uint64_t CallCount = std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0, - CalleeEntryCount.getValue()); + CalleeEntryCount.getCount()); for (auto const &Entry : VMap) if (isa<CallInst>(Entry.first)) if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) - CI->updateProfWeight(CallCount, CalleeEntryCount.getValue()); + CI->updateProfWeight(CallCount, CalleeEntryCount.getCount()); for (BasicBlock &BB : *Callee) // No need to update the callsite if it is pruned during inlining. if (VMap.count(&BB)) for (Instruction &I : BB) if (CallInst *CI = dyn_cast<CallInst>(&I)) - CI->updateProfWeight(CalleeEntryCount.getValue() - CallCount, - CalleeEntryCount.getValue()); + CI->updateProfWeight(CalleeEntryCount.getCount() - CallCount, + CalleeEntryCount.getCount()); } /// Update the entry count of callee after inlining. @@ -1467,18 +1468,19 @@ static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB, // callsite is M, the new callee count is set to N - M. M is estimated from // the caller's entry count, its entry block frequency and the block frequency // of the callsite. - Optional<uint64_t> CalleeCount = Callee->getEntryCount(); + auto CalleeCount = Callee->getEntryCount(); if (!CalleeCount.hasValue() || !PSI) return; - Optional<uint64_t> CallCount = PSI->getProfileCount(CallInst, CallerBFI); + auto CallCount = PSI->getProfileCount(CallInst, CallerBFI); if (!CallCount.hasValue()) return; // Since CallSiteCount is an estimate, it could exceed the original callee // count and has to be set to 0. - if (CallCount.getValue() > CalleeCount.getValue()) - Callee->setEntryCount(0); + if (CallCount.getValue() > CalleeCount.getCount()) + CalleeCount.setCount(0); else - Callee->setEntryCount(CalleeCount.getValue() - CallCount.getValue()); + CalleeCount.setCount(CalleeCount.getCount() - CallCount.getValue()); + Callee->setEntryCount(CalleeCount); } /// This function inlines the called function into the basic block of the @@ -1500,10 +1502,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, IFI.reset(); Function *CalledFunc = CS.getCalledFunction(); - if (!CalledFunc || // Can't inline external function or indirect - CalledFunc->isDeclaration() || - (!ForwardVarArgsTo && CalledFunc->isVarArg())) // call, or call to a vararg function! - return false; + if (!CalledFunc || // Can't inline external function or indirect + CalledFunc->isDeclaration()) // call! + return false; // The inliner does not know how to inline through calls with operand bundles // in general ... @@ -1568,7 +1569,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, Instruction *CallSiteEHPad = nullptr; if (CallerPersonality) { EHPersonality Personality = classifyEHPersonality(CallerPersonality); - if (isFuncletEHPersonality(Personality)) { + if (isScopedEHPersonality(Personality)) { Optional<OperandBundleUse> ParentFunclet = CS.getOperandBundle(LLVMContext::OB_funclet); if (ParentFunclet) @@ -1630,9 +1631,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, auto &DL = Caller->getParent()->getDataLayout(); - assert((CalledFunc->arg_size() == CS.arg_size() || ForwardVarArgsTo) && - "Varargs calls can only be inlined if the Varargs are forwarded!"); - // Calculate the vector of arguments to pass into the function cloner, which // matches up the formal to the actual argument values. CallSite::arg_iterator AI = CS.arg_begin(); @@ -1815,9 +1813,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } SmallVector<Value*,4> VarArgsToForward; + SmallVector<AttributeSet, 4> VarArgsAttrs; for (unsigned i = CalledFunc->getFunctionType()->getNumParams(); - i < CS.getNumArgOperands(); i++) + i < CS.getNumArgOperands(); i++) { VarArgsToForward.push_back(CS.getArgOperand(i)); + VarArgsAttrs.push_back(CS.getAttributes().getParamAttributes(i)); + } bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; if (InlinedFunctionInfo.ContainsCalls) { @@ -1825,6 +1826,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, if (CallInst *CI = dyn_cast<CallInst>(TheCall)) CallSiteTailKind = CI->getTailCallKind(); + // For inlining purposes, the "notail" marker is the same as no marker. + if (CallSiteTailKind == CallInst::TCK_NoTail) + CallSiteTailKind = CallInst::TCK_None; + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) { for (auto II = BB->begin(); II != BB->end();) { @@ -1833,6 +1838,40 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, if (!CI) continue; + // Forward varargs from inlined call site to calls to the + // ForwardVarArgsTo function, if requested, and to musttail calls. + if (!VarArgsToForward.empty() && + ((ForwardVarArgsTo && + CI->getCalledFunction() == ForwardVarArgsTo) || + CI->isMustTailCall())) { + // Collect attributes for non-vararg parameters. + AttributeList Attrs = CI->getAttributes(); + SmallVector<AttributeSet, 8> ArgAttrs; + if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) { + for (unsigned ArgNo = 0; + ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo) + ArgAttrs.push_back(Attrs.getParamAttributes(ArgNo)); + } + + // Add VarArg attributes. + ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end()); + Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttributes(), + Attrs.getRetAttributes(), ArgAttrs); + // Add VarArgs to existing parameters. + SmallVector<Value *, 6> Params(CI->arg_operands()); + Params.append(VarArgsToForward.begin(), VarArgsToForward.end()); + CallInst *NewCI = + CallInst::Create(CI->getCalledFunction() ? CI->getCalledFunction() + : CI->getCalledValue(), + Params, "", CI); + NewCI->setDebugLoc(CI->getDebugLoc()); + NewCI->setAttributes(Attrs); + NewCI->setCallingConv(CI->getCallingConv()); + CI->replaceAllUsesWith(NewCI); + CI->eraseFromParent(); + CI = NewCI; + } + if (Function *F = CI->getCalledFunction()) InlinedDeoptimizeCalls |= F->getIntrinsicID() == Intrinsic::experimental_deoptimize; @@ -1850,6 +1889,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // f -> musttail g -> tail f ==> f -> tail f // f -> g -> musttail f ==> f -> f // f -> g -> tail f ==> f -> f + // + // Inlined notail calls should remain notail calls. CallInst::TailCallKind ChildTCK = CI->getTailCallKind(); if (ChildTCK != CallInst::TCK_NoTail) ChildTCK = std::min(CallSiteTailKind, ChildTCK); @@ -1860,16 +1901,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // 'nounwind'. if (MarkNoUnwind) CI->setDoesNotThrow(); - - if (ForwardVarArgsTo && !VarArgsToForward.empty() && - CI->getCalledFunction() == ForwardVarArgsTo) { - SmallVector<Value*, 6> Params(CI->arg_operands()); - Params.append(VarArgsToForward.begin(), VarArgsToForward.end()); - CallInst *Call = CallInst::Create(CI->getCalledFunction(), Params, "", CI); - Call->setDebugLoc(CI->getDebugLoc()); - CI->replaceAllUsesWith(Call); - CI->eraseFromParent(); - } } } } diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp index 23ec45edb3ef..003721f2b939 100644 --- a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp @@ -17,7 +17,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" using namespace llvm; namespace { diff --git a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp index 5a90dcb033b2..3fbb3487884b 100644 --- a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp +++ b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp @@ -372,7 +372,7 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, /// information about the operands are known. Implements both 32bit and 64bit /// scalar division. /// -/// @brief Replace Rem with generated code. +/// Replace Rem with generated code. bool llvm::expandRemainder(BinaryOperator *Rem) { assert((Rem->getOpcode() == Instruction::SRem || Rem->getOpcode() == Instruction::URem) && @@ -430,7 +430,7 @@ bool llvm::expandRemainder(BinaryOperator *Rem) { /// when more information about the operands are known. Implements both /// 32bit and 64bit scalar division. /// -/// @brief Replace Div with generated code. +/// Replace Div with generated code. bool llvm::expandDivision(BinaryOperator *Div) { assert((Div->getOpcode() == Instruction::SDiv || Div->getOpcode() == Instruction::UDiv) && @@ -482,7 +482,7 @@ bool llvm::expandDivision(BinaryOperator *Div) { /// that have no or very little suppport for smaller than 32 bit integer /// arithmetic. /// -/// @brief Replace Rem with emulation code. +/// Replace Rem with emulation code. bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) { assert((Rem->getOpcode() == Instruction::SRem || Rem->getOpcode() == Instruction::URem) && @@ -531,7 +531,7 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) { /// 64 bits. Uses the above routines and extends the inputs/truncates the /// outputs to operate in 64 bits. /// -/// @brief Replace Rem with emulation code. +/// Replace Rem with emulation code. bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) { assert((Rem->getOpcode() == Instruction::SRem || Rem->getOpcode() == Instruction::URem) && @@ -580,7 +580,7 @@ bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) { /// in 32 bits; that is, these routines are good for targets that have no /// or very little support for smaller than 32 bit integer arithmetic. /// -/// @brief Replace Div with emulation code. +/// Replace Div with emulation code. bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) { assert((Div->getOpcode() == Instruction::SDiv || Div->getOpcode() == Instruction::UDiv) && @@ -628,7 +628,7 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) { /// above routines and extends the inputs/truncates the outputs to operate /// in 64 bits. /// -/// @brief Replace Div with emulation code. +/// Replace Div with emulation code. bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) { assert((Div->getOpcode() == Instruction::SDiv || Div->getOpcode() == Instruction::UDiv) && diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp index ae0e2bb6c280..956d0387c7a8 100644 --- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -36,13 +36,14 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PredIteratorCache.h" #include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; @@ -214,18 +215,27 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, Worklist.push_back(PostProcessPN); // Keep track of PHI nodes that we want to remove because they did not have - // any uses rewritten. + // any uses rewritten. If the new PHI is used, store it so that we can + // try to propagate dbg.value intrinsics to it. + SmallVector<PHINode *, 2> NeedDbgValues; for (PHINode *PN : AddedPHIs) if (PN->use_empty()) PHIsToRemove.insert(PN); - + else + NeedDbgValues.push_back(PN); + insertDebugValuesForPHIs(InstBB, NeedDbgValues); Changed = true; } - // Remove PHI nodes that did not have any uses rewritten. - for (PHINode *PN : PHIsToRemove) { - assert (PN->use_empty() && "Trying to remove a phi with uses."); - PN->eraseFromParent(); - } + // Remove PHI nodes that did not have any uses rewritten. We need to redo the + // use_empty() check here, because even if the PHI node wasn't used when added + // to PHIsToRemove, later added PHI nodes can be using it. This cleanup is + // not guaranteed to handle trees/cycles of PHI nodes that only are used by + // each other. Such situations has only been noticed when the input IR + // contains unreachable code, and leaving some extra redundant PHI nodes in + // such situations is considered a minor problem. + for (PHINode *PN : PHIsToRemove) + if (PN->use_empty()) + PN->eraseFromParent(); return Changed; } diff --git a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp index 42aca757c2af..9832a6f24e1f 100644 --- a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp @@ -79,11 +79,11 @@ public: bool perform() { bool Changed = false; for (auto &CI : WorkList) { - DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName() - << "\n"); + LLVM_DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName() + << "\n"); if (perform(CI)) { Changed = true; - DEBUG(dbgs() << "Transformed\n"); + LLVM_DEBUG(dbgs() << "Transformed\n"); } } return Changed; @@ -421,7 +421,7 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI, const LibFunc &Func) { // FIXME: LibFunc_powf and powl TBD. if (Func != LibFunc_pow) { - DEBUG(dbgs() << "Not handled powf() and powl()\n"); + LLVM_DEBUG(dbgs() << "Not handled powf() and powl()\n"); return nullptr; } @@ -433,7 +433,7 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI, if (ConstantFP *CF = dyn_cast<ConstantFP>(Base)) { double D = CF->getValueAPF().convertToDouble(); if (D < 1.0f || D > APInt::getMaxValue(8).getZExtValue()) { - DEBUG(dbgs() << "Not handled pow(): constant base out of range\n"); + LLVM_DEBUG(dbgs() << "Not handled pow(): constant base out of range\n"); return nullptr; } @@ -447,7 +447,7 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI, // If the Base value coming from an integer type. Instruction *I = dyn_cast<Instruction>(Base); if (!I) { - DEBUG(dbgs() << "Not handled pow(): FP type base\n"); + LLVM_DEBUG(dbgs() << "Not handled pow(): FP type base\n"); return nullptr; } unsigned Opcode = I->getOpcode(); @@ -461,7 +461,7 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI, else if (BW == 32) UpperV = 32.0f; else { - DEBUG(dbgs() << "Not handled pow(): type too wide\n"); + LLVM_DEBUG(dbgs() << "Not handled pow(): type too wide\n"); return nullptr; } @@ -477,7 +477,7 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI, Value *Cond0 = BBBuilder.CreateFCmp(CmpInst::FCMP_OLE, Base, V0); return BBBuilder.CreateOr(Cond0, Cond); } - DEBUG(dbgs() << "Not handled pow(): base not from integer convert\n"); + LLVM_DEBUG(dbgs() << "Not handled pow(): base not from integer convert\n"); return nullptr; } @@ -496,9 +496,9 @@ void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) { SuccBB->setName("cdce.end"); CI->removeFromParent(); CallBB->getInstList().insert(CallBB->getFirstInsertionPt(), CI); - DEBUG(dbgs() << "== Basic Block After =="); - DEBUG(dbgs() << *CallBB->getSinglePredecessor() << *CallBB - << *CallBB->getSingleSuccessor() << "\n"); + LLVM_DEBUG(dbgs() << "== Basic Block After =="); + LLVM_DEBUG(dbgs() << *CallBB->getSinglePredecessor() << *CallBB + << *CallBB->getSingleSuccessor() << "\n"); } // Perform the transformation to a single candidate. @@ -529,10 +529,7 @@ static bool runImpl(Function &F, const TargetLibraryInfo &TLI, bool Changed = CCDCE.perform(); // Verify the dominator after we've updated it locally. -#ifndef NDEBUG - if (DT) - DT->verifyDomTree(); -#endif + assert(!DT || DT->verify(DominatorTree::VerificationLevel::Fast)); return Changed; } diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index acccf7abf808..ae3cb077a3af 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -73,6 +73,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> #include <cassert> #include <climits> @@ -100,7 +101,8 @@ STATISTIC(NumRemoved, "Number of unreachable basic blocks removed"); /// conditions and indirectbr addresses this might make dead if /// DeleteDeadConditions is true. bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, + DeferredDominance *DDT) { TerminatorInst *T = BB->getTerminator(); IRBuilder<> Builder(T); @@ -123,6 +125,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // Replace the conditional branch with an unconditional one. Builder.CreateBr(Destination); BI->eraseFromParent(); + if (DDT) + DDT->deleteEdge(BB, OldDest); return true; } @@ -193,9 +197,12 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, createBranchWeights(Weights)); } // Remove this entry. - DefaultDest->removePredecessor(SI->getParent()); + BasicBlock *ParentBB = SI->getParent(); + DefaultDest->removePredecessor(ParentBB); i = SI->removeCase(i); e = SI->case_end(); + if (DDT) + DDT->deleteEdge(ParentBB, DefaultDest); continue; } @@ -221,14 +228,20 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // Insert the new branch. Builder.CreateBr(TheOnlyDest); BasicBlock *BB = SI->getParent(); + std::vector <DominatorTree::UpdateType> Updates; + if (DDT) + Updates.reserve(SI->getNumSuccessors() - 1); // Remove entries from PHI nodes which we no longer branch to... for (BasicBlock *Succ : SI->successors()) { // Found case matching a constant operand? - if (Succ == TheOnlyDest) + if (Succ == TheOnlyDest) { TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest - else + } else { Succ->removePredecessor(BB); + if (DDT) + Updates.push_back({DominatorTree::Delete, BB, Succ}); + } } // Delete the old switch. @@ -236,6 +249,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, SI->eraseFromParent(); if (DeleteDeadConditions) RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); + if (DDT) + DDT->applyUpdates(Updates); return true; } @@ -281,14 +296,23 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, if (auto *BA = dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) { BasicBlock *TheOnlyDest = BA->getBasicBlock(); + std::vector <DominatorTree::UpdateType> Updates; + if (DDT) + Updates.reserve(IBI->getNumDestinations() - 1); + // Insert the new branch. Builder.CreateBr(TheOnlyDest); for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { - if (IBI->getDestination(i) == TheOnlyDest) + if (IBI->getDestination(i) == TheOnlyDest) { TheOnlyDest = nullptr; - else - IBI->getDestination(i)->removePredecessor(IBI->getParent()); + } else { + BasicBlock *ParentBB = IBI->getParent(); + BasicBlock *DestBB = IBI->getDestination(i); + DestBB->removePredecessor(ParentBB); + if (DDT) + Updates.push_back({DominatorTree::Delete, ParentBB, DestBB}); + } } Value *Address = IBI->getAddress(); IBI->eraseFromParent(); @@ -303,6 +327,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, new UnreachableInst(BB->getContext(), BB); } + if (DDT) + DDT->applyUpdates(Updates); return true; } } @@ -346,6 +372,11 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, return false; return true; } + if (DbgLabelInst *DLI = dyn_cast<DbgLabelInst>(I)) { + if (DLI->getLabel()) + return false; + return true; + } if (!I->mayHaveSideEffects()) return true; @@ -353,8 +384,9 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, // Special case intrinsics that "may have side effects" but can be deleted // when dead. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - // Safe to delete llvm.stacksave if dead. - if (II->getIntrinsicID() == Intrinsic::stacksave) + // Safe to delete llvm.stacksave and launder.invariant.group if dead. + if (II->getIntrinsicID() == Intrinsic::stacksave || + II->getIntrinsicID() == Intrinsic::launder_invariant_group) return true; // Lifetime intrinsics are dead when their right-hand is undef. @@ -402,17 +434,31 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V, SmallVector<Instruction*, 16> DeadInsts; DeadInsts.push_back(I); + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI); - do { - I = DeadInsts.pop_back_val(); + return true; +} + +void llvm::RecursivelyDeleteTriviallyDeadInstructions( + SmallVectorImpl<Instruction *> &DeadInsts, const TargetLibraryInfo *TLI) { + // Process the dead instruction list until empty. + while (!DeadInsts.empty()) { + Instruction &I = *DeadInsts.pop_back_val(); + assert(I.use_empty() && "Instructions with uses are not dead."); + assert(isInstructionTriviallyDead(&I, TLI) && + "Live instruction found in dead worklist!"); + + // Don't lose the debug info while deleting the instructions. + salvageDebugInfo(I); // Null out all of the instruction's operands to see if any operand becomes // dead as we go. - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - Value *OpV = I->getOperand(i); - I->setOperand(i, nullptr); + for (Use &OpU : I.operands()) { + Value *OpV = OpU.get(); + OpU.set(nullptr); - if (!OpV->use_empty()) continue; + if (!OpV->use_empty()) + continue; // If the operand is an instruction that became dead as we nulled out the // operand, and if it is 'trivially' dead, delete it in a future loop @@ -422,10 +468,8 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V, DeadInsts.push_back(OpI); } - I->eraseFromParent(); - } while (!DeadInsts.empty()); - - return true; + I.eraseFromParent(); + } } /// areAllUsesEqual - Check whether the uses of a value are all the same. @@ -477,6 +521,8 @@ simplifyAndDCEInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI) { if (isInstructionTriviallyDead(I, TLI)) { + salvageDebugInfo(*I); + // Null out all of the instruction's operands to see if any operand becomes // dead as we go. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { @@ -579,7 +625,8 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, /// /// .. and delete the predecessor corresponding to the '1', this will attempt to /// recursively fold the and to 0. -void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) { +void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, + DeferredDominance *DDT) { // This only adjusts blocks with PHI nodes. if (!isa<PHINode>(BB->begin())) return; @@ -602,13 +649,18 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) { // of the block. if (PhiIt != OldPhiIt) PhiIt = &BB->front(); } + if (DDT) + DDT->deleteEdge(Pred, BB); } /// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its /// predecessor is known to have one successor (DestBB!). Eliminate the edge /// between them, moving the instructions in the predecessor into DestBB and /// deleting the predecessor block. -void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) { +void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT, + DeferredDominance *DDT) { + assert(!(DT && DDT) && "Cannot call with both DT and DDT."); + // If BB has single-entry PHI nodes, fold them. while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { Value *NewVal = PN->getIncomingValue(0); @@ -621,6 +673,24 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) { BasicBlock *PredBB = DestBB->getSinglePredecessor(); assert(PredBB && "Block doesn't have a single predecessor!"); + bool ReplaceEntryBB = false; + if (PredBB == &DestBB->getParent()->getEntryBlock()) + ReplaceEntryBB = true; + + // Deferred DT update: Collect all the edges that enter PredBB. These + // dominator edges will be redirected to DestBB. + std::vector <DominatorTree::UpdateType> Updates; + if (DDT && !ReplaceEntryBB) { + Updates.reserve(1 + (2 * pred_size(PredBB))); + Updates.push_back({DominatorTree::Delete, PredBB, DestBB}); + for (auto I = pred_begin(PredBB), E = pred_end(PredBB); I != E; ++I) { + Updates.push_back({DominatorTree::Delete, *I, PredBB}); + // This predecessor of PredBB may already have DestBB as a successor. + if (llvm::find(successors(*I), DestBB) == succ_end(*I)) + Updates.push_back({DominatorTree::Insert, *I, DestBB}); + } + } + // Zap anything that took the address of DestBB. Not doing this will give the // address an invalid value. if (DestBB->hasAddressTaken()) { @@ -641,7 +711,7 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) { // If the PredBB is the entry block of the function, move DestBB up to // become the entry block after we erase PredBB. - if (PredBB == &DestBB->getParent()->getEntryBlock()) + if (ReplaceEntryBB) DestBB->moveAfter(PredBB); if (DT) { @@ -653,8 +723,19 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) { DT->eraseNode(PredBB); } } - // Nuke BB. - PredBB->eraseFromParent(); + + if (DDT) { + DDT->deleteBB(PredBB); // Deferred deletion of BB. + if (ReplaceEntryBB) + // The entry block was removed and there is no external interface for the + // dominator tree to be notified of this change. In this corner-case we + // recalculate the entire tree. + DDT->recalculate(*(DestBB->getParent())); + else + DDT->applyUpdates(Updates); + } else { + PredBB->eraseFromParent(); // Nuke BB. + } } /// CanMergeValues - Return true if we can choose one of these values to use @@ -671,8 +752,8 @@ static bool CanMergeValues(Value *First, Value *Second) { static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!"); - DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into " - << Succ->getName() << "\n"); + LLVM_DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into " + << Succ->getName() << "\n"); // Shortcut, if there is only a single predecessor it must be BB and merging // is always safe if (Succ->getSinglePredecessor()) return true; @@ -695,10 +776,11 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { if (BBPreds.count(IBB) && !CanMergeValues(BBPN->getIncomingValueForBlock(IBB), PN->getIncomingValue(PI))) { - DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " - << Succ->getName() << " is conflicting with " - << BBPN->getName() << " with regard to common predecessor " - << IBB->getName() << "\n"); + LLVM_DEBUG(dbgs() + << "Can't fold, phi node " << PN->getName() << " in " + << Succ->getName() << " is conflicting with " + << BBPN->getName() << " with regard to common predecessor " + << IBB->getName() << "\n"); return false; } } @@ -711,9 +793,10 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { BasicBlock *IBB = PN->getIncomingBlock(PI); if (BBPreds.count(IBB) && !CanMergeValues(Val, PN->getIncomingValue(PI))) { - DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " - << Succ->getName() << " is conflicting with regard to common " - << "predecessor " << IBB->getName() << "\n"); + LLVM_DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() + << " in " << Succ->getName() + << " is conflicting with regard to common " + << "predecessor " << IBB->getName() << "\n"); return false; } } @@ -726,7 +809,7 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { using PredBlockVector = SmallVector<BasicBlock *, 16>; using IncomingValueMap = DenseMap<BasicBlock *, Value *>; -/// \brief Determines the value to use as the phi node input for a block. +/// Determines the value to use as the phi node input for a block. /// /// Select between \p OldVal any value that we know flows from \p BB /// to a particular phi on the basis of which one (if either) is not @@ -755,7 +838,7 @@ static Value *selectIncomingValueForBlock(Value *OldVal, BasicBlock *BB, return OldVal; } -/// \brief Create a map from block to value for the operands of a +/// Create a map from block to value for the operands of a /// given phi. /// /// Create a map from block to value for each non-undef value flowing @@ -774,7 +857,7 @@ static void gatherIncomingValuesToPhi(PHINode *PN, } } -/// \brief Replace the incoming undef values to a phi with the values +/// Replace the incoming undef values to a phi with the values /// from a block-to-value map. /// /// \param PN The phi we are replacing the undefs in. @@ -794,7 +877,7 @@ static void replaceUndefValuesInPhi(PHINode *PN, } } -/// \brief Replace a value flowing from a block to a phi with +/// Replace a value flowing from a block to a phi with /// potentially multiple instances of that value flowing from the /// block's predecessors to the phi. /// @@ -861,7 +944,8 @@ static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB, /// potential side-effect free intrinsics and the branch. If possible, /// eliminate BB by rewriting all the predecessors to branch to the successor /// block and return true. If we can't transform, return false. -bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { +bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, + DeferredDominance *DDT) { assert(BB != &BB->getParent()->getEntryBlock() && "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!"); @@ -900,7 +984,20 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { } } - DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); + LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); + + std::vector<DominatorTree::UpdateType> Updates; + if (DDT) { + Updates.reserve(1 + (2 * pred_size(BB))); + Updates.push_back({DominatorTree::Delete, BB, Succ}); + // All predecessors of BB will be moved to Succ. + for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { + Updates.push_back({DominatorTree::Delete, *I, BB}); + // This predecessor of BB may already have Succ as a successor. + if (llvm::find(successors(*I), Succ) == succ_end(*I)) + Updates.push_back({DominatorTree::Insert, *I, Succ}); + } + } if (isa<PHINode>(Succ->begin())) { // If there is more than one pred of succ, and there are PHI nodes in @@ -946,7 +1043,13 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { // Everything that jumped to BB now goes to Succ. BB->replaceAllUsesWith(Succ); if (!Succ->hasName()) Succ->takeName(BB); - BB->eraseFromParent(); // Delete the old basic block. + + if (DDT) { + DDT->deleteBB(BB); // Deferred deletion of the old basic block. + DDT->applyUpdates(Updates); + } else { + BB->eraseFromParent(); // Delete the old basic block. + } return true; } @@ -1125,6 +1228,31 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar, return false; } +/// Check if the alloc size of \p ValTy is large enough to cover the variable +/// (or fragment of the variable) described by \p DII. +/// +/// This is primarily intended as a helper for the different +/// ConvertDebugDeclareToDebugValue functions. The dbg.declare/dbg.addr that is +/// converted describes an alloca'd variable, so we need to use the +/// alloc size of the value when doing the comparison. E.g. an i1 value will be +/// identified as covering an n-bit fragment, if the store size of i1 is at +/// least n bits. +static bool valueCoversEntireFragment(Type *ValTy, DbgInfoIntrinsic *DII) { + const DataLayout &DL = DII->getModule()->getDataLayout(); + uint64_t ValueSize = DL.getTypeAllocSizeInBits(ValTy); + if (auto FragmentSize = DII->getFragmentSizeInBits()) + return ValueSize >= *FragmentSize; + // We can't always calculate the size of the DI variable (e.g. if it is a + // VLA). Try to use the size of the alloca that the dbg intrinsic describes + // intead. + if (DII->isAddressOfVariable()) + if (auto *AI = dyn_cast_or_null<AllocaInst>(DII->getVariableLocation())) + if (auto FragmentSize = AI->getAllocationSizeInBits(DL)) + return ValueSize >= *FragmentSize; + // Could not determine size of variable. Conservatively return false. + return false; +} + /// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value /// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic. void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII, @@ -1135,6 +1263,21 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII, auto *DIExpr = DII->getExpression(); Value *DV = SI->getOperand(0); + if (!valueCoversEntireFragment(SI->getValueOperand()->getType(), DII)) { + // FIXME: If storing to a part of the variable described by the dbg.declare, + // then we want to insert a dbg.value for the corresponding fragment. + LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: " + << *DII << '\n'); + // For now, when there is a store to parts of the variable (but we do not + // know which part) we insert an dbg.value instrinsic to indicate that we + // know nothing about the variable's content. + DV = UndefValue::get(DV->getType()); + if (!LdStHasDebugValue(DIVar, DIExpr, SI)) + Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, DII->getDebugLoc(), + SI); + return; + } + // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. Argument *ExtendedArg = nullptr; @@ -1178,6 +1321,15 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII, if (LdStHasDebugValue(DIVar, DIExpr, LI)) return; + if (!valueCoversEntireFragment(LI->getType(), DII)) { + // FIXME: If only referring to a part of the variable described by the + // dbg.declare, then we want to insert a dbg.value for the corresponding + // fragment. + LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: " + << *DII << '\n'); + return; + } + // We are now tracking the loaded value instead of the address. In the // future if multi-location support is added to the IR, it might be // preferable to keep tracking both the loaded value and the original @@ -1198,6 +1350,15 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII, if (PhiHasDebugValue(DIVar, DIExpr, APN)) return; + if (!valueCoversEntireFragment(APN->getType(), DII)) { + // FIXME: If only referring to a part of the variable described by the + // dbg.declare, then we want to insert a dbg.value for the corresponding + // fragment. + LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: " + << *DII << '\n'); + return; + } + BasicBlock *BB = APN->getParent(); auto InsertionPt = BB->getFirstInsertionPt(); @@ -1237,33 +1398,91 @@ bool llvm::LowerDbgDeclare(Function &F) { // stored on the stack, while the dbg.declare can only describe // the stack slot (and at a lexical-scope granularity). Later // passes will attempt to elide the stack slot. - if (AI && !isArray(AI)) { - for (auto &AIUse : AI->uses()) { - User *U = AIUse.getUser(); - if (StoreInst *SI = dyn_cast<StoreInst>(U)) { - if (AIUse.getOperandNo() == 1) - ConvertDebugDeclareToDebugValue(DDI, SI, DIB); - } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) { - ConvertDebugDeclareToDebugValue(DDI, LI, DIB); - } else if (CallInst *CI = dyn_cast<CallInst>(U)) { - // This is a call by-value or some other instruction that - // takes a pointer to the variable. Insert a *value* - // intrinsic that describes the alloca. - DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), - DDI->getExpression(), DDI->getDebugLoc(), - CI); - } + if (!AI || isArray(AI)) + continue; + + // A volatile load/store means that the alloca can't be elided anyway. + if (llvm::any_of(AI->users(), [](User *U) -> bool { + if (LoadInst *LI = dyn_cast<LoadInst>(U)) + return LI->isVolatile(); + if (StoreInst *SI = dyn_cast<StoreInst>(U)) + return SI->isVolatile(); + return false; + })) + continue; + + for (auto &AIUse : AI->uses()) { + User *U = AIUse.getUser(); + if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (AIUse.getOperandNo() == 1) + ConvertDebugDeclareToDebugValue(DDI, SI, DIB); + } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + ConvertDebugDeclareToDebugValue(DDI, LI, DIB); + } else if (CallInst *CI = dyn_cast<CallInst>(U)) { + // This is a call by-value or some other instruction that takes a + // pointer to the variable. Insert a *value* intrinsic that describes + // the variable by dereferencing the alloca. + auto *DerefExpr = + DIExpression::append(DDI->getExpression(), dwarf::DW_OP_deref); + DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr, + DDI->getDebugLoc(), CI); } - DDI->eraseFromParent(); } + DDI->eraseFromParent(); } return true; } +/// Propagate dbg.value intrinsics through the newly inserted PHIs. +void llvm::insertDebugValuesForPHIs(BasicBlock *BB, + SmallVectorImpl<PHINode *> &InsertedPHIs) { + assert(BB && "No BasicBlock to clone dbg.value(s) from."); + if (InsertedPHIs.size() == 0) + return; + + // Map existing PHI nodes to their dbg.values. + ValueToValueMapTy DbgValueMap; + for (auto &I : *BB) { + if (auto DbgII = dyn_cast<DbgInfoIntrinsic>(&I)) { + if (auto *Loc = dyn_cast_or_null<PHINode>(DbgII->getVariableLocation())) + DbgValueMap.insert({Loc, DbgII}); + } + } + if (DbgValueMap.size() == 0) + return; + + // Then iterate through the new PHIs and look to see if they use one of the + // previously mapped PHIs. If so, insert a new dbg.value intrinsic that will + // propagate the info through the new PHI. + LLVMContext &C = BB->getContext(); + for (auto PHI : InsertedPHIs) { + BasicBlock *Parent = PHI->getParent(); + // Avoid inserting an intrinsic into an EH block. + if (Parent->getFirstNonPHI()->isEHPad()) + continue; + auto PhiMAV = MetadataAsValue::get(C, ValueAsMetadata::get(PHI)); + for (auto VI : PHI->operand_values()) { + auto V = DbgValueMap.find(VI); + if (V != DbgValueMap.end()) { + auto *DbgII = cast<DbgInfoIntrinsic>(V->second); + Instruction *NewDbgII = DbgII->clone(); + NewDbgII->setOperand(0, PhiMAV); + auto InsertionPt = Parent->getFirstInsertionPt(); + assert(InsertionPt != Parent->end() && "Ill-formed basic block"); + NewDbgII->insertBefore(&*InsertionPt); + } + } + } +} + /// Finds all intrinsics declaring local variables as living in the memory that /// 'V' points to. This may include a mix of dbg.declare and /// dbg.addr intrinsics. TinyPtrVector<DbgInfoIntrinsic *> llvm::FindDbgAddrUses(Value *V) { + // This function is hot. Check whether the value has any metadata to avoid a + // DenseMap lookup. + if (!V->isUsedByMetadata()) + return {}; auto *L = LocalAsMetadata::getIfExists(V); if (!L) return {}; @@ -1282,6 +1501,10 @@ TinyPtrVector<DbgInfoIntrinsic *> llvm::FindDbgAddrUses(Value *V) { } void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) { + // This function is hot. Check whether the value has any metadata to avoid a + // DenseMap lookup. + if (!V->isUsedByMetadata()) + return; if (auto *L = LocalAsMetadata::getIfExists(V)) if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L)) for (User *U : MDV->users()) @@ -1289,8 +1512,12 @@ void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) { DbgValues.push_back(DVI); } -static void findDbgUsers(SmallVectorImpl<DbgInfoIntrinsic *> &DbgUsers, - Value *V) { +void llvm::findDbgUsers(SmallVectorImpl<DbgInfoIntrinsic *> &DbgUsers, + Value *V) { + // This function is hot. Check whether the value has any metadata to avoid a + // DenseMap lookup. + if (!V->isUsedByMetadata()) + return; if (auto *L = LocalAsMetadata::getIfExists(V)) if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L)) for (User *U : MDV->users()) @@ -1308,11 +1535,11 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, auto *DIExpr = DII->getExpression(); assert(DIVar && "Missing variable"); DIExpr = DIExpression::prepend(DIExpr, DerefBefore, Offset, DerefAfter); - // Insert llvm.dbg.declare immediately after InsertBefore, and remove old + // Insert llvm.dbg.declare immediately before InsertBefore, and remove old // llvm.dbg.declare. Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore); if (DII == InsertBefore) - InsertBefore = &*std::next(InsertBefore->getIterator()); + InsertBefore = InsertBefore->getNextNode(); DII->eraseFromParent(); } return !DbgAddrs.empty(); @@ -1364,66 +1591,293 @@ void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress, } } -void llvm::salvageDebugInfo(Instruction &I) { - SmallVector<DbgValueInst *, 1> DbgValues; +/// Wrap \p V in a ValueAsMetadata instance. +static MetadataAsValue *wrapValueInMetadata(LLVMContext &C, Value *V) { + return MetadataAsValue::get(C, ValueAsMetadata::get(V)); +} + +bool llvm::salvageDebugInfo(Instruction &I) { + SmallVector<DbgInfoIntrinsic *, 1> DbgUsers; + findDbgUsers(DbgUsers, &I); + if (DbgUsers.empty()) + return false; + auto &M = *I.getModule(); + auto &DL = M.getDataLayout(); + auto &Ctx = I.getContext(); + auto wrapMD = [&](Value *V) { return wrapValueInMetadata(Ctx, V); }; - auto wrapMD = [&](Value *V) { - return MetadataAsValue::get(I.getContext(), ValueAsMetadata::get(V)); + auto doSalvage = [&](DbgInfoIntrinsic *DII, SmallVectorImpl<uint64_t> &Ops) { + auto *DIExpr = DII->getExpression(); + if (!Ops.empty()) { + // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they + // are implicitly pointing out the value as a DWARF memory location + // description. + bool WithStackValue = isa<DbgValueInst>(DII); + DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue); + } + DII->setOperand(0, wrapMD(I.getOperand(0))); + DII->setOperand(2, MetadataAsValue::get(Ctx, DIExpr)); + LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n'); }; - auto applyOffset = [&](DbgValueInst *DVI, uint64_t Offset) { - auto *DIExpr = DVI->getExpression(); - DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset, - DIExpression::NoDeref, - DIExpression::WithStackValue); - DVI->setOperand(0, wrapMD(I.getOperand(0))); - DVI->setOperand(2, MetadataAsValue::get(I.getContext(), DIExpr)); - DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); + auto applyOffset = [&](DbgInfoIntrinsic *DII, uint64_t Offset) { + SmallVector<uint64_t, 8> Ops; + DIExpression::appendOffset(Ops, Offset); + doSalvage(DII, Ops); }; - if (isa<BitCastInst>(&I) || isa<IntToPtrInst>(&I)) { - // Bitcasts are entirely irrelevant for debug info. Rewrite dbg.value, - // dbg.addr, and dbg.declare to use the cast's source. - SmallVector<DbgInfoIntrinsic *, 1> DbgUsers; - findDbgUsers(DbgUsers, &I); + auto applyOps = [&](DbgInfoIntrinsic *DII, + std::initializer_list<uint64_t> Opcodes) { + SmallVector<uint64_t, 8> Ops(Opcodes); + doSalvage(DII, Ops); + }; + + if (auto *CI = dyn_cast<CastInst>(&I)) { + if (!CI->isNoopCast(DL)) + return false; + + // No-op casts are irrelevant for debug info. + MetadataAsValue *CastSrc = wrapMD(I.getOperand(0)); for (auto *DII : DbgUsers) { - DII->setOperand(0, wrapMD(I.getOperand(0))); - DEBUG(dbgs() << "SALVAGE: " << *DII << '\n'); + DII->setOperand(0, CastSrc); + LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n'); } + return true; } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) { - findDbgValues(DbgValues, &I); - for (auto *DVI : DbgValues) { - unsigned BitWidth = - M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace()); - APInt Offset(BitWidth, 0); - // Rewrite a constant GEP into a DIExpression. Since we are performing - // arithmetic to compute the variable's *value* in the DIExpression, we - // need to mark the expression with a DW_OP_stack_value. - if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) - // GEP offsets are i32 and thus always fit into an int64_t. - applyOffset(DVI, Offset.getSExtValue()); - } + unsigned BitWidth = + M.getDataLayout().getIndexSizeInBits(GEP->getPointerAddressSpace()); + // Rewrite a constant GEP into a DIExpression. Since we are performing + // arithmetic to compute the variable's *value* in the DIExpression, we + // need to mark the expression with a DW_OP_stack_value. + APInt Offset(BitWidth, 0); + if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) + for (auto *DII : DbgUsers) + applyOffset(DII, Offset.getSExtValue()); + return true; } else if (auto *BI = dyn_cast<BinaryOperator>(&I)) { - if (BI->getOpcode() == Instruction::Add) - if (auto *ConstInt = dyn_cast<ConstantInt>(I.getOperand(1))) - if (ConstInt->getBitWidth() <= 64) { - APInt Offset = ConstInt->getValue(); - findDbgValues(DbgValues, &I); - for (auto *DVI : DbgValues) - applyOffset(DVI, Offset.getSExtValue()); - } + // Rewrite binary operations with constant integer operands. + auto *ConstInt = dyn_cast<ConstantInt>(I.getOperand(1)); + if (!ConstInt || ConstInt->getBitWidth() > 64) + return false; + + uint64_t Val = ConstInt->getSExtValue(); + for (auto *DII : DbgUsers) { + switch (BI->getOpcode()) { + case Instruction::Add: + applyOffset(DII, Val); + break; + case Instruction::Sub: + applyOffset(DII, -int64_t(Val)); + break; + case Instruction::Mul: + applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_mul}); + break; + case Instruction::SDiv: + applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_div}); + break; + case Instruction::SRem: + applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_mod}); + break; + case Instruction::Or: + applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_or}); + break; + case Instruction::And: + applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_and}); + break; + case Instruction::Xor: + applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_xor}); + break; + case Instruction::Shl: + applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shl}); + break; + case Instruction::LShr: + applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shr}); + break; + case Instruction::AShr: + applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shra}); + break; + default: + // TODO: Salvage constants from each kind of binop we know about. + return false; + } + } + return true; } else if (isa<LoadInst>(&I)) { - findDbgValues(DbgValues, &I); - for (auto *DVI : DbgValues) { + MetadataAsValue *AddrMD = wrapMD(I.getOperand(0)); + for (auto *DII : DbgUsers) { // Rewrite the load into DW_OP_deref. - auto *DIExpr = DVI->getExpression(); + auto *DIExpr = DII->getExpression(); DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref); - DVI->setOperand(0, wrapMD(I.getOperand(0))); - DVI->setOperand(2, MetadataAsValue::get(I.getContext(), DIExpr)); - DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); + DII->setOperand(0, AddrMD); + DII->setOperand(2, MetadataAsValue::get(Ctx, DIExpr)); + LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n'); + } + return true; + } + return false; +} + +/// A replacement for a dbg.value expression. +using DbgValReplacement = Optional<DIExpression *>; + +/// Point debug users of \p From to \p To using exprs given by \p RewriteExpr, +/// possibly moving/deleting users to prevent use-before-def. Returns true if +/// changes are made. +static bool rewriteDebugUsers( + Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT, + function_ref<DbgValReplacement(DbgInfoIntrinsic &DII)> RewriteExpr) { + // Find debug users of From. + SmallVector<DbgInfoIntrinsic *, 1> Users; + findDbgUsers(Users, &From); + if (Users.empty()) + return false; + + // Prevent use-before-def of To. + bool Changed = false; + SmallPtrSet<DbgInfoIntrinsic *, 1> DeleteOrSalvage; + if (isa<Instruction>(&To)) { + bool DomPointAfterFrom = From.getNextNonDebugInstruction() == &DomPoint; + + for (auto *DII : Users) { + // It's common to see a debug user between From and DomPoint. Move it + // after DomPoint to preserve the variable update without any reordering. + if (DomPointAfterFrom && DII->getNextNonDebugInstruction() == &DomPoint) { + LLVM_DEBUG(dbgs() << "MOVE: " << *DII << '\n'); + DII->moveAfter(&DomPoint); + Changed = true; + + // Users which otherwise aren't dominated by the replacement value must + // be salvaged or deleted. + } else if (!DT.dominates(&DomPoint, DII)) { + DeleteOrSalvage.insert(DII); + } } } + + // Update debug users without use-before-def risk. + for (auto *DII : Users) { + if (DeleteOrSalvage.count(DII)) + continue; + + LLVMContext &Ctx = DII->getContext(); + DbgValReplacement DVR = RewriteExpr(*DII); + if (!DVR) + continue; + + DII->setOperand(0, wrapValueInMetadata(Ctx, &To)); + DII->setOperand(2, MetadataAsValue::get(Ctx, *DVR)); + LLVM_DEBUG(dbgs() << "REWRITE: " << *DII << '\n'); + Changed = true; + } + + if (!DeleteOrSalvage.empty()) { + // Try to salvage the remaining debug users. + Changed |= salvageDebugInfo(From); + + // Delete the debug users which weren't salvaged. + for (auto *DII : DeleteOrSalvage) { + if (DII->getVariableLocation() == &From) { + LLVM_DEBUG(dbgs() << "Erased UseBeforeDef: " << *DII << '\n'); + DII->eraseFromParent(); + Changed = true; + } + } + } + + return Changed; +} + +/// Check if a bitcast between a value of type \p FromTy to type \p ToTy would +/// losslessly preserve the bits and semantics of the value. This predicate is +/// symmetric, i.e swapping \p FromTy and \p ToTy should give the same result. +/// +/// Note that Type::canLosslesslyBitCastTo is not suitable here because it +/// allows semantically unequivalent bitcasts, such as <2 x i64> -> <4 x i32>, +/// and also does not allow lossless pointer <-> integer conversions. +static bool isBitCastSemanticsPreserving(const DataLayout &DL, Type *FromTy, + Type *ToTy) { + // Trivially compatible types. + if (FromTy == ToTy) + return true; + + // Handle compatible pointer <-> integer conversions. + if (FromTy->isIntOrPtrTy() && ToTy->isIntOrPtrTy()) { + bool SameSize = DL.getTypeSizeInBits(FromTy) == DL.getTypeSizeInBits(ToTy); + bool LosslessConversion = !DL.isNonIntegralPointerType(FromTy) && + !DL.isNonIntegralPointerType(ToTy); + return SameSize && LosslessConversion; + } + + // TODO: This is not exhaustive. + return false; +} + +bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To, + Instruction &DomPoint, DominatorTree &DT) { + // Exit early if From has no debug users. + if (!From.isUsedByMetadata()) + return false; + + assert(&From != &To && "Can't replace something with itself"); + + Type *FromTy = From.getType(); + Type *ToTy = To.getType(); + + auto Identity = [&](DbgInfoIntrinsic &DII) -> DbgValReplacement { + return DII.getExpression(); + }; + + // Handle no-op conversions. + Module &M = *From.getModule(); + const DataLayout &DL = M.getDataLayout(); + if (isBitCastSemanticsPreserving(DL, FromTy, ToTy)) + return rewriteDebugUsers(From, To, DomPoint, DT, Identity); + + // Handle integer-to-integer widening and narrowing. + // FIXME: Use DW_OP_convert when it's available everywhere. + if (FromTy->isIntegerTy() && ToTy->isIntegerTy()) { + uint64_t FromBits = FromTy->getPrimitiveSizeInBits(); + uint64_t ToBits = ToTy->getPrimitiveSizeInBits(); + assert(FromBits != ToBits && "Unexpected no-op conversion"); + + // When the width of the result grows, assume that a debugger will only + // access the low `FromBits` bits when inspecting the source variable. + if (FromBits < ToBits) + return rewriteDebugUsers(From, To, DomPoint, DT, Identity); + + // The width of the result has shrunk. Use sign/zero extension to describe + // the source variable's high bits. + auto SignOrZeroExt = [&](DbgInfoIntrinsic &DII) -> DbgValReplacement { + DILocalVariable *Var = DII.getVariable(); + + // Without knowing signedness, sign/zero extension isn't possible. + auto Signedness = Var->getSignedness(); + if (!Signedness) + return None; + + bool Signed = *Signedness == DIBasicType::Signedness::Signed; + + if (!Signed) { + // In the unsigned case, assume that a debugger will initialize the + // high bits to 0 and do a no-op conversion. + return Identity(DII); + } else { + // In the signed case, the high bits are given by sign extension, i.e: + // (To >> (ToBits - 1)) * ((2 ^ FromBits) - 1) + // Calculate the high bits and OR them together with the low bits. + SmallVector<uint64_t, 8> Ops({dwarf::DW_OP_dup, dwarf::DW_OP_constu, + (ToBits - 1), dwarf::DW_OP_shr, + dwarf::DW_OP_lit0, dwarf::DW_OP_not, + dwarf::DW_OP_mul, dwarf::DW_OP_or}); + return DIExpression::appendToStack(DII.getExpression(), Ops); + } + }; + return rewriteDebugUsers(From, To, DomPoint, DT, SignOrZeroExt); + } + + // TODO: Floating-point conversions, vectors. + return false; } unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { @@ -1448,13 +1902,19 @@ unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { } unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap, - bool PreserveLCSSA) { + bool PreserveLCSSA, DeferredDominance *DDT) { BasicBlock *BB = I->getParent(); + std::vector <DominatorTree::UpdateType> Updates; + // Loop over all of the successors, removing BB's entry from any PHI // nodes. - for (BasicBlock *Successor : successors(BB)) + if (DDT) + Updates.reserve(BB->getTerminator()->getNumSuccessors()); + for (BasicBlock *Successor : successors(BB)) { Successor->removePredecessor(BB, PreserveLCSSA); - + if (DDT) + Updates.push_back({DominatorTree::Delete, BB, Successor}); + } // Insert a call to llvm.trap right before this. This turns the undefined // behavior into a hard fail instead of falling through into random code. if (UseLLVMTrap) { @@ -1474,11 +1934,13 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap, BB->getInstList().erase(BBI++); ++NumInstrsRemoved; } + if (DDT) + DDT->applyUpdates(Updates); return NumInstrsRemoved; } /// changeToCall - Convert the specified invoke into a normal call. -static void changeToCall(InvokeInst *II) { +static void changeToCall(InvokeInst *II, DeferredDominance *DDT = nullptr) { SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end()); SmallVector<OperandBundleDef, 1> OpBundles; II->getOperandBundlesAsDefs(OpBundles); @@ -1491,11 +1953,16 @@ static void changeToCall(InvokeInst *II) { II->replaceAllUsesWith(NewCall); // Follow the call by a branch to the normal destination. - BranchInst::Create(II->getNormalDest(), II); + BasicBlock *NormalDestBB = II->getNormalDest(); + BranchInst::Create(NormalDestBB, II); // Update PHI nodes in the unwind destination - II->getUnwindDest()->removePredecessor(II->getParent()); + BasicBlock *BB = II->getParent(); + BasicBlock *UnwindDestBB = II->getUnwindDest(); + UnwindDestBB->removePredecessor(BB); II->eraseFromParent(); + if (DDT) + DDT->deleteEdge(BB, UnwindDestBB); } BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, @@ -1536,7 +2003,8 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, } static bool markAliveBlocks(Function &F, - SmallPtrSetImpl<BasicBlock*> &Reachable) { + SmallPtrSetImpl<BasicBlock*> &Reachable, + DeferredDominance *DDT = nullptr) { SmallVector<BasicBlock*, 128> Worklist; BasicBlock *BB = &F.front(); Worklist.push_back(BB); @@ -1549,41 +2017,44 @@ static bool markAliveBlocks(Function &F, // instructions into LLVM unreachable insts. The instruction combining pass // canonicalizes unreachable insts into stores to null or undef. for (Instruction &I : *BB) { - // Assumptions that are known to be false are equivalent to unreachable. - // Also, if the condition is undefined, then we make the choice most - // beneficial to the optimizer, and choose that to also be unreachable. - if (auto *II = dyn_cast<IntrinsicInst>(&I)) { - if (II->getIntrinsicID() == Intrinsic::assume) { - if (match(II->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) { - // Don't insert a call to llvm.trap right before the unreachable. - changeToUnreachable(II, false); - Changed = true; - break; - } - } - - if (II->getIntrinsicID() == Intrinsic::experimental_guard) { - // A call to the guard intrinsic bails out of the current compilation - // unit if the predicate passed to it is false. If the predicate is a - // constant false, then we know the guard will bail out of the current - // compile unconditionally, so all code following it is dead. - // - // Note: unlike in llvm.assume, it is not "obviously profitable" for - // guards to treat `undef` as `false` since a guard on `undef` can - // still be useful for widening. - if (match(II->getArgOperand(0), m_Zero())) - if (!isa<UnreachableInst>(II->getNextNode())) { - changeToUnreachable(II->getNextNode(), /*UseLLVMTrap=*/ false); + if (auto *CI = dyn_cast<CallInst>(&I)) { + Value *Callee = CI->getCalledValue(); + // Handle intrinsic calls. + if (Function *F = dyn_cast<Function>(Callee)) { + auto IntrinsicID = F->getIntrinsicID(); + // Assumptions that are known to be false are equivalent to + // unreachable. Also, if the condition is undefined, then we make the + // choice most beneficial to the optimizer, and choose that to also be + // unreachable. + if (IntrinsicID == Intrinsic::assume) { + if (match(CI->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) { + // Don't insert a call to llvm.trap right before the unreachable. + changeToUnreachable(CI, false, false, DDT); Changed = true; break; } - } - } - - if (auto *CI = dyn_cast<CallInst>(&I)) { - Value *Callee = CI->getCalledValue(); - if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { - changeToUnreachable(CI, /*UseLLVMTrap=*/false); + } else if (IntrinsicID == Intrinsic::experimental_guard) { + // A call to the guard intrinsic bails out of the current + // compilation unit if the predicate passed to it is false. If the + // predicate is a constant false, then we know the guard will bail + // out of the current compile unconditionally, so all code following + // it is dead. + // + // Note: unlike in llvm.assume, it is not "obviously profitable" for + // guards to treat `undef` as `false` since a guard on `undef` can + // still be useful for widening. + if (match(CI->getArgOperand(0), m_Zero())) + if (!isa<UnreachableInst>(CI->getNextNode())) { + changeToUnreachable(CI->getNextNode(), /*UseLLVMTrap=*/false, + false, DDT); + Changed = true; + break; + } + } + } else if ((isa<ConstantPointerNull>(Callee) && + !NullPointerIsDefined(CI->getFunction())) || + isa<UndefValue>(Callee)) { + changeToUnreachable(CI, /*UseLLVMTrap=*/false, false, DDT); Changed = true; break; } @@ -1593,17 +2064,16 @@ static bool markAliveBlocks(Function &F, // though. if (!isa<UnreachableInst>(CI->getNextNode())) { // Don't insert a call to llvm.trap right before the unreachable. - changeToUnreachable(CI->getNextNode(), false); + changeToUnreachable(CI->getNextNode(), false, false, DDT); Changed = true; } break; } - } + } else if (auto *SI = dyn_cast<StoreInst>(&I)) { + // Store to undef and store to null are undefined and used to signal + // that they should be changed to unreachable by passes that can't + // modify the CFG. - // Store to undef and store to null are undefined and used to signal that - // they should be changed to unreachable by passes that can't modify the - // CFG. - if (auto *SI = dyn_cast<StoreInst>(&I)) { // Don't touch volatile stores. if (SI->isVolatile()) continue; @@ -1611,8 +2081,9 @@ static bool markAliveBlocks(Function &F, if (isa<UndefValue>(Ptr) || (isa<ConstantPointerNull>(Ptr) && - SI->getPointerAddressSpace() == 0)) { - changeToUnreachable(SI, true); + !NullPointerIsDefined(SI->getFunction(), + SI->getPointerAddressSpace()))) { + changeToUnreachable(SI, true, false, DDT); Changed = true; break; } @@ -1623,17 +2094,23 @@ static bool markAliveBlocks(Function &F, if (auto *II = dyn_cast<InvokeInst>(Terminator)) { // Turn invokes that call 'nounwind' functions into ordinary calls. Value *Callee = II->getCalledValue(); - if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { - changeToUnreachable(II, true); + if ((isa<ConstantPointerNull>(Callee) && + !NullPointerIsDefined(BB->getParent())) || + isa<UndefValue>(Callee)) { + changeToUnreachable(II, true, false, DDT); Changed = true; } else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) { if (II->use_empty() && II->onlyReadsMemory()) { // jump to the normal destination branch. - BranchInst::Create(II->getNormalDest(), II); - II->getUnwindDest()->removePredecessor(II->getParent()); + BasicBlock *NormalDestBB = II->getNormalDest(); + BasicBlock *UnwindDestBB = II->getUnwindDest(); + BranchInst::Create(NormalDestBB, II); + UnwindDestBB->removePredecessor(II->getParent()); II->eraseFromParent(); + if (DDT) + DDT->deleteEdge(BB, UnwindDestBB); } else - changeToCall(II); + changeToCall(II, DDT); Changed = true; } } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Terminator)) { @@ -1679,7 +2156,7 @@ static bool markAliveBlocks(Function &F, } } - Changed |= ConstantFoldTerminator(BB, true); + Changed |= ConstantFoldTerminator(BB, true, nullptr, DDT); for (BasicBlock *Successor : successors(BB)) if (Reachable.insert(Successor).second) Worklist.push_back(Successor); @@ -1687,11 +2164,11 @@ static bool markAliveBlocks(Function &F, return Changed; } -void llvm::removeUnwindEdge(BasicBlock *BB) { +void llvm::removeUnwindEdge(BasicBlock *BB, DeferredDominance *DDT) { TerminatorInst *TI = BB->getTerminator(); if (auto *II = dyn_cast<InvokeInst>(TI)) { - changeToCall(II); + changeToCall(II, DDT); return; } @@ -1719,15 +2196,18 @@ void llvm::removeUnwindEdge(BasicBlock *BB) { UnwindDest->removePredecessor(BB); TI->replaceAllUsesWith(NewTI); TI->eraseFromParent(); + if (DDT) + DDT->deleteEdge(BB, UnwindDest); } /// removeUnreachableBlocks - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false /// otherwise. If `LVI` is passed, this function preserves LazyValueInfo /// after modifying the CFG. -bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) { +bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, + DeferredDominance *DDT) { SmallPtrSet<BasicBlock*, 16> Reachable; - bool Changed = markAliveBlocks(F, Reachable); + bool Changed = markAliveBlocks(F, Reachable, DDT); // If there are unreachable blocks in the CFG... if (Reachable.size() == F.size()) @@ -1737,25 +2217,39 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) { NumRemoved += F.size()-Reachable.size(); // Loop over all of the basic blocks that are not reachable, dropping all of - // their internal references... - for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) { - if (Reachable.count(&*BB)) + // their internal references. Update DDT and LVI if available. + std::vector <DominatorTree::UpdateType> Updates; + for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) { + auto *BB = &*I; + if (Reachable.count(BB)) continue; - - for (BasicBlock *Successor : successors(&*BB)) + for (BasicBlock *Successor : successors(BB)) { if (Reachable.count(Successor)) - Successor->removePredecessor(&*BB); + Successor->removePredecessor(BB); + if (DDT) + Updates.push_back({DominatorTree::Delete, BB, Successor}); + } if (LVI) - LVI->eraseBlock(&*BB); + LVI->eraseBlock(BB); BB->dropAllReferences(); } - for (Function::iterator I = ++F.begin(); I != F.end();) - if (!Reachable.count(&*I)) - I = F.getBasicBlockList().erase(I); - else + for (Function::iterator I = ++F.begin(); I != F.end();) { + auto *BB = &*I; + if (Reachable.count(BB)) { ++I; + continue; + } + if (DDT) { + DDT->deleteBB(BB); // deferred deletion of BB. + ++I; + } else { + I = F.getBasicBlockList().erase(I); + } + } + if (DDT) + DDT->applyUpdates(Updates); return true; } @@ -1848,8 +2342,8 @@ static unsigned replaceDominatedUsesWith(Value *From, Value *To, if (!Dominates(Root, U)) continue; U.set(To); - DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as " - << *To << " in " << *U << "\n"); + LLVM_DEBUG(dbgs() << "Replace dominated use of '" << From->getName() + << "' as " << *To << " in " << *U << "\n"); ++Count; } return Count; @@ -1953,7 +2447,7 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, if (!NewTy->isPointerTy()) return; - unsigned BitWidth = DL.getTypeSizeInBits(NewTy); + unsigned BitWidth = DL.getIndexTypeSizeInBits(NewTy); if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { MDNode *NN = MDNode::get(OldLI.getContext(), None); NewLI.setMetadata(LLVMContext::MD_nonnull, NN); @@ -2265,7 +2759,7 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) { // Static allocas (constant size in the entry block) are handled by // prologue/epilogue insertion so they're free anyway. We definitely don't // want to make them non-constant. - return !dyn_cast<AllocaInst>(I)->isStaticAlloca(); + return !cast<AllocaInst>(I)->isStaticAlloca(); case Instruction::GetElementPtr: if (OpIdx == 0) return true; diff --git a/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp new file mode 100644 index 000000000000..6e92e679f999 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -0,0 +1,645 @@ +//===----------------- LoopRotationUtils.cpp -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides utilities to convert a loop into a loop with bottom test. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LoopRotationUtils.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +using namespace llvm; + +#define DEBUG_TYPE "loop-rotate" + +STATISTIC(NumRotated, "Number of loops rotated"); + +namespace { +/// A simple loop rotation transformation. +class LoopRotate { + const unsigned MaxHeaderSize; + LoopInfo *LI; + const TargetTransformInfo *TTI; + AssumptionCache *AC; + DominatorTree *DT; + ScalarEvolution *SE; + const SimplifyQuery &SQ; + bool RotationOnly; + bool IsUtilMode; + +public: + LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI, + const TargetTransformInfo *TTI, AssumptionCache *AC, + DominatorTree *DT, ScalarEvolution *SE, const SimplifyQuery &SQ, + bool RotationOnly, bool IsUtilMode) + : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE), + SQ(SQ), RotationOnly(RotationOnly), IsUtilMode(IsUtilMode) {} + bool processLoop(Loop *L); + +private: + bool rotateLoop(Loop *L, bool SimplifiedLatch); + bool simplifyLoopLatch(Loop *L); +}; +} // end anonymous namespace + +/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the +/// old header into the preheader. If there were uses of the values produced by +/// these instruction that were outside of the loop, we have to insert PHI nodes +/// to merge the two values. Do this now. +static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, + BasicBlock *OrigPreheader, + ValueToValueMapTy &ValueMap, + SmallVectorImpl<PHINode*> *InsertedPHIs) { + // Remove PHI node entries that are no longer live. + BasicBlock::iterator I, E = OrigHeader->end(); + for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) + PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader)); + + // Now fix up users of the instructions in OrigHeader, inserting PHI nodes + // as necessary. + SSAUpdater SSA(InsertedPHIs); + for (I = OrigHeader->begin(); I != E; ++I) { + Value *OrigHeaderVal = &*I; + + // If there are no uses of the value (e.g. because it returns void), there + // is nothing to rewrite. + if (OrigHeaderVal->use_empty()) + continue; + + Value *OrigPreHeaderVal = ValueMap.lookup(OrigHeaderVal); + + // The value now exits in two versions: the initial value in the preheader + // and the loop "next" value in the original header. + SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName()); + SSA.AddAvailableValue(OrigHeader, OrigHeaderVal); + SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal); + + // Visit each use of the OrigHeader instruction. + for (Value::use_iterator UI = OrigHeaderVal->use_begin(), + UE = OrigHeaderVal->use_end(); + UI != UE;) { + // Grab the use before incrementing the iterator. + Use &U = *UI; + + // Increment the iterator before removing the use from the list. + ++UI; + + // SSAUpdater can't handle a non-PHI use in the same block as an + // earlier def. We can easily handle those cases manually. + Instruction *UserInst = cast<Instruction>(U.getUser()); + if (!isa<PHINode>(UserInst)) { + BasicBlock *UserBB = UserInst->getParent(); + + // The original users in the OrigHeader are already using the + // original definitions. + if (UserBB == OrigHeader) + continue; + + // Users in the OrigPreHeader need to use the value to which the + // original definitions are mapped. + if (UserBB == OrigPreheader) { + U = OrigPreHeaderVal; + continue; + } + } + + // Anything else can be handled by SSAUpdater. + SSA.RewriteUse(U); + } + + // Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug + // intrinsics. + SmallVector<DbgValueInst *, 1> DbgValues; + llvm::findDbgValues(DbgValues, OrigHeaderVal); + for (auto &DbgValue : DbgValues) { + // The original users in the OrigHeader are already using the original + // definitions. + BasicBlock *UserBB = DbgValue->getParent(); + if (UserBB == OrigHeader) + continue; + + // Users in the OrigPreHeader need to use the value to which the + // original definitions are mapped and anything else can be handled by + // the SSAUpdater. To avoid adding PHINodes, check if the value is + // available in UserBB, if not substitute undef. + Value *NewVal; + if (UserBB == OrigPreheader) + NewVal = OrigPreHeaderVal; + else if (SSA.HasValueForBlock(UserBB)) + NewVal = SSA.GetValueInMiddleOfBlock(UserBB); + else + NewVal = UndefValue::get(OrigHeaderVal->getType()); + DbgValue->setOperand(0, + MetadataAsValue::get(OrigHeaderVal->getContext(), + ValueAsMetadata::get(NewVal))); + } + } +} + +// Look for a phi which is only used outside the loop (via a LCSSA phi) +// in the exit from the header. This means that rotating the loop can +// remove the phi. +static bool shouldRotateLoopExitingLatch(Loop *L) { + BasicBlock *Header = L->getHeader(); + BasicBlock *HeaderExit = Header->getTerminator()->getSuccessor(0); + if (L->contains(HeaderExit)) + HeaderExit = Header->getTerminator()->getSuccessor(1); + + for (auto &Phi : Header->phis()) { + // Look for uses of this phi in the loop/via exits other than the header. + if (llvm::any_of(Phi.users(), [HeaderExit](const User *U) { + return cast<Instruction>(U)->getParent() != HeaderExit; + })) + continue; + return true; + } + + return false; +} + +/// Rotate loop LP. Return true if the loop is rotated. +/// +/// \param SimplifiedLatch is true if the latch was just folded into the final +/// loop exit. In this case we may want to rotate even though the new latch is +/// now an exiting branch. This rotation would have happened had the latch not +/// been simplified. However, if SimplifiedLatch is false, then we avoid +/// rotating loops in which the latch exits to avoid excessive or endless +/// rotation. LoopRotate should be repeatable and converge to a canonical +/// form. This property is satisfied because simplifying the loop latch can only +/// happen once across multiple invocations of the LoopRotate pass. +bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { + // If the loop has only one block then there is not much to rotate. + if (L->getBlocks().size() == 1) + return false; + + BasicBlock *OrigHeader = L->getHeader(); + BasicBlock *OrigLatch = L->getLoopLatch(); + + BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); + if (!BI || BI->isUnconditional()) + return false; + + // If the loop header is not one of the loop exiting blocks then + // either this loop is already rotated or it is not + // suitable for loop rotation transformations. + if (!L->isLoopExiting(OrigHeader)) + return false; + + // If the loop latch already contains a branch that leaves the loop then the + // loop is already rotated. + if (!OrigLatch) + return false; + + // Rotate if either the loop latch does *not* exit the loop, or if the loop + // latch was just simplified. Or if we think it will be profitable. + if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false && + !shouldRotateLoopExitingLatch(L)) + return false; + + // Check size of original header and reject loop if it is very big or we can't + // duplicate blocks inside it. + { + SmallPtrSet<const Value *, 32> EphValues; + CodeMetrics::collectEphemeralValues(L, AC, EphValues); + + CodeMetrics Metrics; + Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues); + if (Metrics.notDuplicatable) { + LLVM_DEBUG( + dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable" + << " instructions: "; + L->dump()); + return false; + } + if (Metrics.convergent) { + LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent " + "instructions: "; + L->dump()); + return false; + } + if (Metrics.NumInsts > MaxHeaderSize) + return false; + } + + // Now, this loop is suitable for rotation. + BasicBlock *OrigPreheader = L->getLoopPreheader(); + + // If the loop could not be converted to canonical form, it must have an + // indirectbr in it, just give up. + if (!OrigPreheader || !L->hasDedicatedExits()) + return false; + + // Anything ScalarEvolution may know about this loop or the PHI nodes + // in its header will soon be invalidated. We should also invalidate + // all outer loops because insertion and deletion of blocks that happens + // during the rotation may violate invariants related to backedge taken + // infos in them. + if (SE) + SE->forgetTopmostLoop(L); + + LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump()); + + // Find new Loop header. NewHeader is a Header's one and only successor + // that is inside loop. Header's other successor is outside the + // loop. Otherwise loop is not suitable for rotation. + BasicBlock *Exit = BI->getSuccessor(0); + BasicBlock *NewHeader = BI->getSuccessor(1); + if (L->contains(Exit)) + std::swap(Exit, NewHeader); + assert(NewHeader && "Unable to determine new loop header"); + assert(L->contains(NewHeader) && !L->contains(Exit) && + "Unable to determine loop header and exit blocks"); + + // This code assumes that the new header has exactly one predecessor. + // Remove any single-entry PHI nodes in it. + assert(NewHeader->getSinglePredecessor() && + "New header doesn't have one pred!"); + FoldSingleEntryPHINodes(NewHeader); + + // Begin by walking OrigHeader and populating ValueMap with an entry for + // each Instruction. + BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end(); + ValueToValueMapTy ValueMap; + + // For PHI nodes, the value available in OldPreHeader is just the + // incoming value from OldPreHeader. + for (; PHINode *PN = dyn_cast<PHINode>(I); ++I) + ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader); + + // For the rest of the instructions, either hoist to the OrigPreheader if + // possible or create a clone in the OldPreHeader if not. + TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator(); + + // Record all debug intrinsics preceding LoopEntryBranch to avoid duplication. + using DbgIntrinsicHash = + std::pair<std::pair<Value *, DILocalVariable *>, DIExpression *>; + auto makeHash = [](DbgInfoIntrinsic *D) -> DbgIntrinsicHash { + return {{D->getVariableLocation(), D->getVariable()}, D->getExpression()}; + }; + SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics; + for (auto I = std::next(OrigPreheader->rbegin()), E = OrigPreheader->rend(); + I != E; ++I) { + if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&*I)) + DbgIntrinsics.insert(makeHash(DII)); + else + break; + } + + while (I != E) { + Instruction *Inst = &*I++; + + // If the instruction's operands are invariant and it doesn't read or write + // memory, then it is safe to hoist. Doing this doesn't change the order of + // execution in the preheader, but does prevent the instruction from + // executing in each iteration of the loop. This means it is safe to hoist + // something that might trap, but isn't safe to hoist something that reads + // memory (without proving that the loop doesn't write). + if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() && + !Inst->mayWriteToMemory() && !isa<TerminatorInst>(Inst) && + !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) { + Inst->moveBefore(LoopEntryBranch); + continue; + } + + // Otherwise, create a duplicate of the instruction. + Instruction *C = Inst->clone(); + + // Eagerly remap the operands of the instruction. + RemapInstruction(C, ValueMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + + // Avoid inserting the same intrinsic twice. + if (auto *DII = dyn_cast<DbgInfoIntrinsic>(C)) + if (DbgIntrinsics.count(makeHash(DII))) { + C->deleteValue(); + continue; + } + + // With the operands remapped, see if the instruction constant folds or is + // otherwise simplifyable. This commonly occurs because the entry from PHI + // nodes allows icmps and other instructions to fold. + Value *V = SimplifyInstruction(C, SQ); + if (V && LI->replacementPreservesLCSSAForm(C, V)) { + // If so, then delete the temporary instruction and stick the folded value + // in the map. + ValueMap[Inst] = V; + if (!C->mayHaveSideEffects()) { + C->deleteValue(); + C = nullptr; + } + } else { + ValueMap[Inst] = C; + } + if (C) { + // Otherwise, stick the new instruction into the new block! + C->setName(Inst->getName()); + C->insertBefore(LoopEntryBranch); + + if (auto *II = dyn_cast<IntrinsicInst>(C)) + if (II->getIntrinsicID() == Intrinsic::assume) + AC->registerAssumption(II); + } + } + + // Along with all the other instructions, we just cloned OrigHeader's + // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's + // successors by duplicating their incoming values for OrigHeader. + TerminatorInst *TI = OrigHeader->getTerminator(); + for (BasicBlock *SuccBB : TI->successors()) + for (BasicBlock::iterator BI = SuccBB->begin(); + PHINode *PN = dyn_cast<PHINode>(BI); ++BI) + PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader); + + // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove + // OrigPreHeader's old terminator (the original branch into the loop), and + // remove the corresponding incoming values from the PHI nodes in OrigHeader. + LoopEntryBranch->eraseFromParent(); + + + SmallVector<PHINode*, 2> InsertedPHIs; + // If there were any uses of instructions in the duplicated block outside the + // loop, update them, inserting PHI nodes as required + RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap, + &InsertedPHIs); + + // Attach dbg.value intrinsics to the new phis if that phi uses a value that + // previously had debug metadata attached. This keeps the debug info + // up-to-date in the loop body. + if (!InsertedPHIs.empty()) + insertDebugValuesForPHIs(OrigHeader, InsertedPHIs); + + // NewHeader is now the header of the loop. + L->moveToHeader(NewHeader); + assert(L->getHeader() == NewHeader && "Latch block is our new header"); + + // Inform DT about changes to the CFG. + if (DT) { + // The OrigPreheader branches to the NewHeader and Exit now. Then, inform + // the DT about the removed edge to the OrigHeader (that got removed). + SmallVector<DominatorTree::UpdateType, 3> Updates; + Updates.push_back({DominatorTree::Insert, OrigPreheader, Exit}); + Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader}); + Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader}); + DT->applyUpdates(Updates); + } + + // At this point, we've finished our major CFG changes. As part of cloning + // the loop into the preheader we've simplified instructions and the + // duplicated conditional branch may now be branching on a constant. If it is + // branching on a constant and if that constant means that we enter the loop, + // then we fold away the cond branch to an uncond branch. This simplifies the + // loop in cases important for nested loops, and it also means we don't have + // to split as many edges. + BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator()); + assert(PHBI->isConditional() && "Should be clone of BI condbr!"); + if (!isa<ConstantInt>(PHBI->getCondition()) || + PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) != + NewHeader) { + // The conditional branch can't be folded, handle the general case. + // Split edges as necessary to preserve LoopSimplify form. + + // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and + // thus is not a preheader anymore. + // Split the edge to form a real preheader. + BasicBlock *NewPH = SplitCriticalEdge( + OrigPreheader, NewHeader, + CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA()); + NewPH->setName(NewHeader->getName() + ".lr.ph"); + + // Preserve canonical loop form, which means that 'Exit' should have only + // one predecessor. Note that Exit could be an exit block for multiple + // nested loops, causing both of the edges to now be critical and need to + // be split. + SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit)); + bool SplitLatchEdge = false; + for (BasicBlock *ExitPred : ExitPreds) { + // We only need to split loop exit edges. + Loop *PredLoop = LI->getLoopFor(ExitPred); + if (!PredLoop || PredLoop->contains(Exit)) + continue; + if (isa<IndirectBrInst>(ExitPred->getTerminator())) + continue; + SplitLatchEdge |= L->getLoopLatch() == ExitPred; + BasicBlock *ExitSplit = SplitCriticalEdge( + ExitPred, Exit, + CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA()); + ExitSplit->moveBefore(Exit); + } + assert(SplitLatchEdge && + "Despite splitting all preds, failed to split latch exit?"); + } else { + // We can fold the conditional branch in the preheader, this makes things + // simpler. The first step is to remove the extra edge to the Exit block. + Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); + BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI); + NewBI->setDebugLoc(PHBI->getDebugLoc()); + PHBI->eraseFromParent(); + + // With our CFG finalized, update DomTree if it is available. + if (DT) DT->deleteEdge(OrigPreheader, Exit); + } + + assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation"); + assert(L->getLoopLatch() && "Invalid loop latch after loop rotation"); + + // Now that the CFG and DomTree are in a consistent state again, try to merge + // the OrigHeader block into OrigLatch. This will succeed if they are + // connected by an unconditional branch. This is just a cleanup so the + // emitted code isn't too gross in this common case. + MergeBlockIntoPredecessor(OrigHeader, DT, LI); + + LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump()); + + ++NumRotated; + return true; +} + +/// Determine whether the instructions in this range may be safely and cheaply +/// speculated. This is not an important enough situation to develop complex +/// heuristics. We handle a single arithmetic instruction along with any type +/// conversions. +static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, + BasicBlock::iterator End, Loop *L) { + bool seenIncrement = false; + bool MultiExitLoop = false; + + if (!L->getExitingBlock()) + MultiExitLoop = true; + + for (BasicBlock::iterator I = Begin; I != End; ++I) { + + if (!isSafeToSpeculativelyExecute(&*I)) + return false; + + if (isa<DbgInfoIntrinsic>(I)) + continue; + + switch (I->getOpcode()) { + default: + return false; + case Instruction::GetElementPtr: + // GEPs are cheap if all indices are constant. + if (!cast<GEPOperator>(I)->hasAllConstantIndices()) + return false; + // fall-thru to increment case + LLVM_FALLTHROUGH; + case Instruction::Add: + case Instruction::Sub: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: { + Value *IVOpnd = + !isa<Constant>(I->getOperand(0)) + ? I->getOperand(0) + : !isa<Constant>(I->getOperand(1)) ? I->getOperand(1) : nullptr; + if (!IVOpnd) + return false; + + // If increment operand is used outside of the loop, this speculation + // could cause extra live range interference. + if (MultiExitLoop) { + for (User *UseI : IVOpnd->users()) { + auto *UserInst = cast<Instruction>(UseI); + if (!L->contains(UserInst)) + return false; + } + } + + if (seenIncrement) + return false; + seenIncrement = true; + break; + } + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + // ignore type conversions + break; + } + } + return true; +} + +/// Fold the loop tail into the loop exit by speculating the loop tail +/// instructions. Typically, this is a single post-increment. In the case of a +/// simple 2-block loop, hoisting the increment can be much better than +/// duplicating the entire loop header. In the case of loops with early exits, +/// rotation will not work anyway, but simplifyLoopLatch will put the loop in +/// canonical form so downstream passes can handle it. +/// +/// I don't believe this invalidates SCEV. +bool LoopRotate::simplifyLoopLatch(Loop *L) { + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch || Latch->hasAddressTaken()) + return false; + + BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator()); + if (!Jmp || !Jmp->isUnconditional()) + return false; + + BasicBlock *LastExit = Latch->getSinglePredecessor(); + if (!LastExit || !L->isLoopExiting(LastExit)) + return false; + + BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator()); + if (!BI) + return false; + + if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L)) + return false; + + LLVM_DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into " + << LastExit->getName() << "\n"); + + // Hoist the instructions from Latch into LastExit. + LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(), + Latch->begin(), Jmp->getIterator()); + + unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1; + BasicBlock *Header = Jmp->getSuccessor(0); + assert(Header == L->getHeader() && "expected a backward branch"); + + // Remove Latch from the CFG so that LastExit becomes the new Latch. + BI->setSuccessor(FallThruPath, Header); + Latch->replaceSuccessorsPhiUsesWith(LastExit); + Jmp->eraseFromParent(); + + // Nuke the Latch block. + assert(Latch->empty() && "unable to evacuate Latch"); + LI->removeBlock(Latch); + if (DT) + DT->eraseNode(Latch); + Latch->eraseFromParent(); + return true; +} + +/// Rotate \c L, and return true if any modification was made. +bool LoopRotate::processLoop(Loop *L) { + // Save the loop metadata. + MDNode *LoopMD = L->getLoopID(); + + bool SimplifiedLatch = false; + + // Simplify the loop latch before attempting to rotate the header + // upward. Rotation may not be needed if the loop tail can be folded into the + // loop exit. + if (!RotationOnly) + SimplifiedLatch = simplifyLoopLatch(L); + + bool MadeChange = rotateLoop(L, SimplifiedLatch); + assert((!MadeChange || L->isLoopExiting(L->getLoopLatch())) && + "Loop latch should be exiting after loop-rotate."); + + // Restore the loop metadata. + // NB! We presume LoopRotation DOESN'T ADD its own metadata. + if ((MadeChange || SimplifiedLatch) && LoopMD) + L->setLoopID(LoopMD); + + return MadeChange || SimplifiedLatch; +} + + +/// The utility to convert a loop into a loop with bottom test. +bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, + AssumptionCache *AC, DominatorTree *DT, + ScalarEvolution *SE, const SimplifyQuery &SQ, + bool RotationOnly = true, + unsigned Threshold = unsigned(-1), + bool IsUtilMode = true) { + LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, SQ, RotationOnly, IsUtilMode); + + return LR.processLoop(L); +} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp index f43af9772771..970494eb4704 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -52,6 +52,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -64,9 +65,8 @@ #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; @@ -141,8 +141,8 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, if (!PreheaderBB) return nullptr; - DEBUG(dbgs() << "LoopSimplify: Creating pre-header " - << PreheaderBB->getName() << "\n"); + LLVM_DEBUG(dbgs() << "LoopSimplify: Creating pre-header " + << PreheaderBB->getName() << "\n"); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. @@ -170,7 +170,7 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, } while (!Worklist.empty()); } -/// \brief The first part of loop-nestification is to find a PHI node that tells +/// The first part of loop-nestification is to find a PHI node that tells /// us how to partition the loops. static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT, AssumptionCache *AC) { @@ -195,7 +195,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT, return nullptr; } -/// \brief If this loop has multiple backedges, try to pull one of them out into +/// If this loop has multiple backedges, try to pull one of them out into /// a nested loop. /// /// This is important for code that looks like @@ -242,7 +242,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, OuterLoopPreds.push_back(PN->getIncomingBlock(i)); } } - DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); + LLVM_DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); // If ScalarEvolution is around and knows anything about values in // this loop, tell it to forget them, because we're about to @@ -332,7 +332,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, return NewOuter; } -/// \brief This method is called when the specified loop has more than one +/// This method is called when the specified loop has more than one /// backedge in it. /// /// If this occurs, revector all of these backedges to target a new basic block @@ -371,8 +371,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); BETerminator->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc()); - DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block " - << BEBlock->getName() << "\n"); + LLVM_DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block " + << BEBlock->getName() << "\n"); // Move the new backedge block to right after the last backedge block. Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator(); @@ -457,7 +457,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, return BEBlock; } -/// \brief Simplify one loop and queue further loops for simplification. +/// Simplify one loop and queue further loops for simplification. static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, @@ -484,8 +484,8 @@ ReprocessLoop: // Delete each unique out-of-loop (and thus dead) predecessor. for (BasicBlock *P : BadPreds) { - DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor " - << P->getName() << "\n"); + LLVM_DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor " + << P->getName() << "\n"); // Zap the dead pred's terminator and replace it with unreachable. TerminatorInst *TI = P->getTerminator(); @@ -504,16 +504,13 @@ ReprocessLoop: if (BI->isConditional()) { if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) { - DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in " - << ExitingBlock->getName() << "\n"); + LLVM_DEBUG(dbgs() + << "LoopSimplify: Resolving \"br i1 undef\" to exit in " + << ExitingBlock->getName() << "\n"); BI->setCondition(ConstantInt::get(Cond->getType(), !L->contains(BI->getSuccessor(0)))); - // This may make the loop analyzable, force SCEV recomputation. - if (SE) - SE->forgetLoop(L); - Changed = true; } } @@ -617,11 +614,8 @@ ReprocessLoop: // comparison and the branch. bool AllInvariant = true; bool AnyInvariant = false; - for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) { + for (auto I = ExitingBlock->instructionsWithoutDebug().begin(); &*I != BI; ) { Instruction *Inst = &*I++; - // Skip debug info intrinsics. - if (isa<DbgInfoIntrinsic>(Inst)) - continue; if (Inst == CI) continue; if (!L->makeLoopInvariant(Inst, AnyInvariant, @@ -648,15 +642,8 @@ ReprocessLoop: // Success. The block is now dead, so remove it from the loop, // update the dominator tree and delete it. - DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " - << ExitingBlock->getName() << "\n"); - - // Notify ScalarEvolution before deleting this block. Currently assume the - // parent loop doesn't change (spliting edges doesn't count). If blocks, - // CFG edges, or other values in the parent loop change, then we need call - // to forgetLoop() for the parent instead. - if (SE) - SE->forgetLoop(L); + LLVM_DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " + << ExitingBlock->getName() << "\n"); assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock)); Changed = true; @@ -679,6 +666,12 @@ ReprocessLoop: } } + // Changing exit conditions for blocks may affect exit counts of this loop and + // any of its paretns, so we must invalidate the entire subtree if we've made + // any changes. + if (Changed && SE) + SE->forgetTopmostLoop(L); + return Changed; } diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 92dfb1c7204d..04b8c1417e0a 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -33,7 +34,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" @@ -63,8 +63,7 @@ UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden, /// Convert the instruction operands from referencing the current values into /// those specified by VMap. -static inline void remapInstruction(Instruction *I, - ValueToValueMapTy &VMap) { +void llvm::remapInstruction(Instruction *I, ValueToValueMapTy &VMap) { for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { Value *Op = I->getOperand(op); @@ -97,16 +96,10 @@ static inline void remapInstruction(Instruction *I, /// Folds a basic block into its predecessor if it only has one predecessor, and /// that predecessor only has one successor. -/// The LoopInfo Analysis that is passed will be kept consistent. If folding is -/// successful references to the containing loop must be removed from -/// ScalarEvolution by calling ScalarEvolution::forgetLoop because SE may have -/// references to the eliminated BB. The argument ForgottenLoops contains a set -/// of loops that have already been forgotten to prevent redundant, expensive -/// calls to ScalarEvolution::forgetLoop. Returns the new combined block. -static BasicBlock * -foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, ScalarEvolution *SE, - SmallPtrSetImpl<Loop *> &ForgottenLoops, - DominatorTree *DT) { +/// The LoopInfo Analysis that is passed will be kept consistent. +BasicBlock *llvm::foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, + ScalarEvolution *SE, + DominatorTree *DT) { // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. @@ -116,7 +109,8 @@ foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, ScalarEvolution *SE, if (OnlyPred->getTerminator()->getNumSuccessors() != 1) return nullptr; - DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred); + LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into " + << OnlyPred->getName() << "\n"); // Resolve any PHI nodes at the start of the block. They are all // guaranteed to have exactly one entry if they exist, unless there are @@ -149,13 +143,6 @@ foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, ScalarEvolution *SE, DT->eraseNode(BB); } - // ScalarEvolution holds references to loop exit blocks. - if (SE) { - if (Loop *L = LI->getLoopFor(BB)) { - if (ForgottenLoops.insert(L).second) - SE->forgetLoop(L); - } - } LI->removeBlock(BB); // Inherit predecessor's name if it exists... @@ -265,6 +252,48 @@ static bool isEpilogProfitable(Loop *L) { return false; } +/// Perform some cleanup and simplifications on loops after unrolling. It is +/// useful to simplify the IV's in the new loop, as well as do a quick +/// simplify/dce pass of the instructions. +void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, + ScalarEvolution *SE, DominatorTree *DT, + AssumptionCache *AC) { + // Simplify any new induction variables in the partially unrolled loop. + if (SE && SimplifyIVs) { + SmallVector<WeakTrackingVH, 16> DeadInsts; + simplifyLoopIVs(L, SE, DT, LI, DeadInsts); + + // Aggressively clean up dead instructions that simplifyLoopIVs already + // identified. Any remaining should be cleaned up below. + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) + RecursivelyDeleteTriviallyDeadInstructions(Inst); + } + + // At this point, the code is well formed. We now do a quick sweep over the + // inserted code, doing constant propagation and dead code elimination as we + // go. + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + const std::vector<BasicBlock *> &NewLoopBlocks = L->getBlocks(); + for (BasicBlock *BB : NewLoopBlocks) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { + Instruction *Inst = &*I++; + + if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC})) + if (LI->replacementPreservesLCSSAForm(Inst, V)) + Inst->replaceAllUsesWith(V); + if (isInstructionTriviallyDead(Inst)) + BB->getInstList().erase(Inst); + } + } + + // TODO: after peeling or unrolling, previously loop variant conditions are + // likely to fold to constants, eagerly propagating those here will require + // fewer cleanup passes to be run. Alternatively, a LoopEarlyCSE might be + // appropriate. +} + /// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional /// branch instruction. However, if the trip count (and multiple) are not known, @@ -310,19 +339,19 @@ LoopUnrollResult llvm::UnrollLoop( BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { - DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); + LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); return LoopUnrollResult::Unmodified; } BasicBlock *LatchBlock = L->getLoopLatch(); if (!LatchBlock) { - DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); + LLVM_DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); return LoopUnrollResult::Unmodified; } // Loops with indirectbr cannot be cloned. if (!L->isSafeToClone()) { - DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n"); + LLVM_DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n"); return LoopUnrollResult::Unmodified; } @@ -335,8 +364,9 @@ LoopUnrollResult llvm::UnrollLoop( if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. - DEBUG(dbgs() << - " Can't unroll; loop not terminated by a conditional branch.\n"); + LLVM_DEBUG( + dbgs() + << " Can't unroll; loop not terminated by a conditional branch.\n"); return LoopUnrollResult::Unmodified; } @@ -345,22 +375,22 @@ LoopUnrollResult llvm::UnrollLoop( }; if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) { - DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch" - " exiting the loop can be unrolled\n"); + LLVM_DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch" + " exiting the loop can be unrolled\n"); return LoopUnrollResult::Unmodified; } if (Header->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. - DEBUG(dbgs() << - " Won't unroll loop: address of header block is taken.\n"); + LLVM_DEBUG( + dbgs() << " Won't unroll loop: address of header block is taken.\n"); return LoopUnrollResult::Unmodified; } if (TripCount != 0) - DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); + LLVM_DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) - DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n"); + LLVM_DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n"); // Effectively "DCE" unrolled iterations that are beyond the tripcount // and will never be executed. @@ -369,7 +399,7 @@ LoopUnrollResult llvm::UnrollLoop( // Don't enter the unroll code if there is nothing to do. if (TripCount == 0 && Count < 2 && PeelCount == 0) { - DEBUG(dbgs() << "Won't unroll; almost nothing to do\n"); + LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n"); return LoopUnrollResult::Unmodified; } @@ -403,8 +433,9 @@ LoopUnrollResult llvm::UnrollLoop( "Did not expect runtime trip-count unrolling " "and peeling for the same loop"); + bool Peeled = false; if (PeelCount) { - bool Peeled = peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA); + Peeled = peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA); // Successful peeling may result in a change in the loop preheader/trip // counts. If we later unroll the loop, we want these to be updated. @@ -419,7 +450,7 @@ LoopUnrollResult llvm::UnrollLoop( // Loops containing convergent instructions must have a count that divides // their TripMultiple. - DEBUG( + LLVM_DEBUG( { bool HasConvergent = false; for (auto &BB : L->blocks()) @@ -442,18 +473,12 @@ LoopUnrollResult llvm::UnrollLoop( if (Force) RuntimeTripCount = false; else { - DEBUG( - dbgs() << "Wont unroll; remainder loop could not be generated" - "when assuming runtime trip count\n"); + LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be " + "generated when assuming runtime trip count\n"); return LoopUnrollResult::Unmodified; } } - // Notify ScalarEvolution that the loop will be substantially changed, - // if not outright eliminated. - if (SE) - SE->forgetLoop(L); - // If we know the trip count, we know the multiple... unsigned BreakoutTrip = 0; if (TripCount != 0) { @@ -468,8 +493,8 @@ LoopUnrollResult llvm::UnrollLoop( using namespace ore; // Report the unrolling decision. if (CompletelyUnroll) { - DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() - << " with trip count " << TripCount << "!\n"); + LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() + << " with trip count " << TripCount << "!\n"); if (ORE) ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), @@ -478,8 +503,8 @@ LoopUnrollResult llvm::UnrollLoop( << NV("UnrollCount", TripCount) << " iterations"; }); } else if (PeelCount) { - DEBUG(dbgs() << "PEELING loop %" << Header->getName() - << " with iteration count " << PeelCount << "!\n"); + LLVM_DEBUG(dbgs() << "PEELING loop %" << Header->getName() + << " with iteration count " << PeelCount << "!\n"); if (ORE) ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(), @@ -495,31 +520,42 @@ LoopUnrollResult llvm::UnrollLoop( << NV("UnrollCount", Count); }; - DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() - << " by " << Count); + LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " + << Count); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { - DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); + LLVM_DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); if (ORE) ORE->emit([&]() { return DiagBuilder() << " with a breakout at trip " << NV("BreakoutTrip", BreakoutTrip); }); } else if (TripMultiple != 1) { - DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); + LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); if (ORE) ORE->emit([&]() { return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple) << " trips per branch"; }); } else if (RuntimeTripCount) { - DEBUG(dbgs() << " with run-time trip count"); + LLVM_DEBUG(dbgs() << " with run-time trip count"); if (ORE) ORE->emit( [&]() { return DiagBuilder() << " with run-time trip count"; }); } - DEBUG(dbgs() << "!\n"); + LLVM_DEBUG(dbgs() << "!\n"); } + // We are going to make changes to this loop. SCEV may be keeping cached info + // about it, in particular about backedge taken count. The changes we make + // are guaranteed to invalidate this information for our loop. It is tempting + // to only invalidate the loop being unrolled, but it is incorrect as long as + // all exiting branches from all inner loops have impact on the outer loops, + // and if something changes inside them then any of outer loops may also + // change. When we forget outermost loop, we also forget all contained loops + // and this is what we need here. + if (SE) + SE->forgetTopmostLoop(L); + bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); @@ -577,14 +613,9 @@ LoopUnrollResult llvm::UnrollLoop( "Header should not be in a sub-loop"); // Tell LI about New. const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops); - if (OldLoop) { + if (OldLoop) LoopsToSimplify.insert(NewLoops[OldLoop]); - // Forget the old loop, since its inputs may have changed. - if (SE) - SE->forgetLoop(OldLoop); - } - if (*BB == Header) // Loop over all of the PHI nodes in the block, changing them to use // the incoming values from the previous block. @@ -769,17 +800,15 @@ LoopUnrollResult llvm::UnrollLoop( } } - if (DT && UnrollVerifyDomtree) - DT->verifyDomTree(); + assert(!DT || !UnrollVerifyDomtree || + DT->verify(DominatorTree::VerificationLevel::Fast)); // Merge adjacent basic blocks, if possible. - SmallPtrSet<Loop *, 4> ForgottenLoops; for (BasicBlock *Latch : Latches) { BranchInst *Term = cast<BranchInst>(Latch->getTerminator()); if (Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); - if (BasicBlock *Fold = - foldBlockIntoPredecessor(Dest, LI, SE, ForgottenLoops, DT)) { + if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) { // Dest has been folded into Fold. Update our worklists accordingly. std::replace(Latches.begin(), Latches.end(), Dest, Fold); UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(), @@ -789,40 +818,10 @@ LoopUnrollResult llvm::UnrollLoop( } } - // Simplify any new induction variables in the partially unrolled loop. - if (SE && !CompletelyUnroll && Count > 1) { - SmallVector<WeakTrackingVH, 16> DeadInsts; - simplifyLoopIVs(L, SE, DT, LI, DeadInsts); - - // Aggressively clean up dead instructions that simplifyLoopIVs already - // identified. Any remaining should be cleaned up below. - while (!DeadInsts.empty()) - if (Instruction *Inst = - dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); - } - - // At this point, the code is well formed. We now do a quick sweep over the - // inserted code, doing constant propagation and dead code elimination as we - // go. - const DataLayout &DL = Header->getModule()->getDataLayout(); - const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); - for (BasicBlock *BB : NewLoopBlocks) { - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { - Instruction *Inst = &*I++; - - if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC})) - if (LI->replacementPreservesLCSSAForm(Inst, V)) - Inst->replaceAllUsesWith(V); - if (isInstructionTriviallyDead(Inst)) - BB->getInstList().erase(Inst); - } - } - - // TODO: after peeling or unrolling, previously loop variant conditions are - // likely to fold to constants, eagerly propagating those here will require - // fewer cleanup passes to be run. Alternatively, a LoopEarlyCSE might be - // appropriate. + // At this point, the code is well formed. We now simplify the unrolled loop, + // doing constant propagation and dead code elimination as we go. + simplifyLoopAfterUnroll(L, !CompletelyUnroll && (Count > 1 || Peeled), LI, SE, + DT, AC); NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp new file mode 100644 index 000000000000..b919f73c3817 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -0,0 +1,785 @@ +//===-- LoopUnrollAndJam.cpp - Loop unrolling utilities -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements loop unroll and jam as a routine, much like +// LoopUnroll.cpp implements loop unroll. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/Utils/Local.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" +using namespace llvm; + +#define DEBUG_TYPE "loop-unroll-and-jam" + +STATISTIC(NumUnrolledAndJammed, "Number of loops unroll and jammed"); +STATISTIC(NumCompletelyUnrolledAndJammed, "Number of loops unroll and jammed"); + +typedef SmallPtrSet<BasicBlock *, 4> BasicBlockSet; + +// Partition blocks in an outer/inner loop pair into blocks before and after +// the loop +static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop, + BasicBlockSet &ForeBlocks, + BasicBlockSet &SubLoopBlocks, + BasicBlockSet &AftBlocks, + DominatorTree *DT) { + BasicBlock *SubLoopLatch = SubLoop->getLoopLatch(); + SubLoopBlocks.insert(SubLoop->block_begin(), SubLoop->block_end()); + + for (BasicBlock *BB : L->blocks()) { + if (!SubLoop->contains(BB)) { + if (DT->dominates(SubLoopLatch, BB)) + AftBlocks.insert(BB); + else + ForeBlocks.insert(BB); + } + } + + // Check that all blocks in ForeBlocks together dominate the subloop + // TODO: This might ideally be done better with a dominator/postdominators. + BasicBlock *SubLoopPreHeader = SubLoop->getLoopPreheader(); + for (BasicBlock *BB : ForeBlocks) { + if (BB == SubLoopPreHeader) + continue; + TerminatorInst *TI = BB->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (!ForeBlocks.count(TI->getSuccessor(i))) + return false; + } + + return true; +} + +// Looks at the phi nodes in Header for values coming from Latch. For these +// instructions and all their operands calls Visit on them, keeping going for +// all the operands in AftBlocks. Returns false if Visit returns false, +// otherwise returns true. This is used to process the instructions in the +// Aft blocks that need to be moved before the subloop. It is used in two +// places. One to check that the required set of instructions can be moved +// before the loop. Then to collect the instructions to actually move in +// moveHeaderPhiOperandsToForeBlocks. +template <typename T> +static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch, + BasicBlockSet &AftBlocks, T Visit) { + SmallVector<Instruction *, 8> Worklist; + for (auto &Phi : Header->phis()) { + Value *V = Phi.getIncomingValueForBlock(Latch); + if (Instruction *I = dyn_cast<Instruction>(V)) + Worklist.push_back(I); + } + + while (!Worklist.empty()) { + Instruction *I = Worklist.back(); + Worklist.pop_back(); + if (!Visit(I)) + return false; + + if (AftBlocks.count(I->getParent())) + for (auto &U : I->operands()) + if (Instruction *II = dyn_cast<Instruction>(U)) + Worklist.push_back(II); + } + + return true; +} + +// Move the phi operands of Header from Latch out of AftBlocks to InsertLoc. +static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header, + BasicBlock *Latch, + Instruction *InsertLoc, + BasicBlockSet &AftBlocks) { + // We need to ensure we move the instructions in the correct order, + // starting with the earliest required instruction and moving forward. + std::vector<Instruction *> Visited; + processHeaderPhiOperands(Header, Latch, AftBlocks, + [&Visited, &AftBlocks](Instruction *I) { + if (AftBlocks.count(I->getParent())) + Visited.push_back(I); + return true; + }); + + // Move all instructions in program order to before the InsertLoc + BasicBlock *InsertLocBB = InsertLoc->getParent(); + for (Instruction *I : reverse(Visited)) { + if (I->getParent() != InsertLocBB) + I->moveBefore(InsertLoc); + } +} + +/* + This method performs Unroll and Jam. For a simple loop like: + for (i = ..) + Fore(i) + for (j = ..) + SubLoop(i, j) + Aft(i) + + Instead of doing normal inner or outer unrolling, we do: + for (i = .., i+=2) + Fore(i) + Fore(i+1) + for (j = ..) + SubLoop(i, j) + SubLoop(i+1, j) + Aft(i) + Aft(i+1) + + So the outer loop is essetially unrolled and then the inner loops are fused + ("jammed") together into a single loop. This can increase speed when there + are loads in SubLoop that are invariant to i, as they become shared between + the now jammed inner loops. + + We do this by spliting the blocks in the loop into Fore, Subloop and Aft. + Fore blocks are those before the inner loop, Aft are those after. Normal + Unroll code is used to copy each of these sets of blocks and the results are + combined together into the final form above. + + isSafeToUnrollAndJam should be used prior to calling this to make sure the + unrolling will be valid. Checking profitablility is also advisable. +*/ +LoopUnrollResult +llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, + unsigned TripMultiple, bool UnrollRemainder, + LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, + AssumptionCache *AC, OptimizationRemarkEmitter *ORE) { + + // When we enter here we should have already checked that it is safe + BasicBlock *Header = L->getHeader(); + assert(L->getSubLoops().size() == 1); + Loop *SubLoop = *L->begin(); + + // Don't enter the unroll code if there is nothing to do. + if (TripCount == 0 && Count < 2) { + LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n"); + return LoopUnrollResult::Unmodified; + } + + assert(Count > 0); + assert(TripMultiple > 0); + assert(TripCount == 0 || TripCount % TripMultiple == 0); + + // Are we eliminating the loop control altogether? + bool CompletelyUnroll = (Count == TripCount); + + // We use the runtime remainder in cases where we don't know trip multiple + if (TripMultiple == 1 || TripMultiple % Count != 0) { + if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false, + /*UseEpilogRemainder*/ true, + UnrollRemainder, LI, SE, DT, AC, true)) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be " + "generated when assuming runtime trip count\n"); + return LoopUnrollResult::Unmodified; + } + } + + // Notify ScalarEvolution that the loop will be substantially changed, + // if not outright eliminated. + if (SE) { + SE->forgetLoop(L); + SE->forgetLoop(SubLoop); + } + + using namespace ore; + // Report the unrolling decision. + if (CompletelyUnroll) { + LLVM_DEBUG(dbgs() << "COMPLETELY UNROLL AND JAMMING loop %" + << Header->getName() << " with trip count " << TripCount + << "!\n"); + ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), + L->getHeader()) + << "completely unroll and jammed loop with " + << NV("UnrollCount", TripCount) << " iterations"); + } else { + auto DiagBuilder = [&]() { + OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), + L->getHeader()); + return Diag << "unroll and jammed loop by a factor of " + << NV("UnrollCount", Count); + }; + + LLVM_DEBUG(dbgs() << "UNROLL AND JAMMING loop %" << Header->getName() + << " by " << Count); + if (TripMultiple != 1) { + LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); + ORE->emit([&]() { + return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple) + << " trips per branch"; + }); + } else { + LLVM_DEBUG(dbgs() << " with run-time trip count"); + ORE->emit([&]() { return DiagBuilder() << " with run-time trip count"; }); + } + LLVM_DEBUG(dbgs() << "!\n"); + } + + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock *LatchBlock = L->getLoopLatch(); + BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); + assert(Preheader && LatchBlock && Header); + assert(BI && !BI->isUnconditional()); + bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); + BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); + bool SubLoopContinueOnTrue = SubLoop->contains( + SubLoop->getLoopLatch()->getTerminator()->getSuccessor(0)); + + // Partition blocks in an outer/inner loop pair into blocks before and after + // the loop + BasicBlockSet SubLoopBlocks; + BasicBlockSet ForeBlocks; + BasicBlockSet AftBlocks; + partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, AftBlocks, + DT); + + // We keep track of the entering/first and exiting/last block of each of + // Fore/SubLoop/Aft in each iteration. This helps make the stapling up of + // blocks easier. + std::vector<BasicBlock *> ForeBlocksFirst; + std::vector<BasicBlock *> ForeBlocksLast; + std::vector<BasicBlock *> SubLoopBlocksFirst; + std::vector<BasicBlock *> SubLoopBlocksLast; + std::vector<BasicBlock *> AftBlocksFirst; + std::vector<BasicBlock *> AftBlocksLast; + ForeBlocksFirst.push_back(Header); + ForeBlocksLast.push_back(SubLoop->getLoopPreheader()); + SubLoopBlocksFirst.push_back(SubLoop->getHeader()); + SubLoopBlocksLast.push_back(SubLoop->getExitingBlock()); + AftBlocksFirst.push_back(SubLoop->getExitBlock()); + AftBlocksLast.push_back(L->getExitingBlock()); + // Maps Blocks[0] -> Blocks[It] + ValueToValueMapTy LastValueMap; + + // Move any instructions from fore phi operands from AftBlocks into Fore. + moveHeaderPhiOperandsToForeBlocks( + Header, LatchBlock, SubLoop->getLoopPreheader()->getTerminator(), + AftBlocks); + + // The current on-the-fly SSA update requires blocks to be processed in + // reverse postorder so that LastValueMap contains the correct value at each + // exit. + LoopBlocksDFS DFS(L); + DFS.perform(LI); + // Stash the DFS iterators before adding blocks to the loop. + LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); + + if (Header->getParent()->isDebugInfoForProfiling()) + for (BasicBlock *BB : L->getBlocks()) + for (Instruction &I : *BB) + if (!isa<DbgInfoIntrinsic>(&I)) + if (const DILocation *DIL = I.getDebugLoc()) + I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count)); + + // Copy all blocks + for (unsigned It = 1; It != Count; ++It) { + std::vector<BasicBlock *> NewBlocks; + // Maps Blocks[It] -> Blocks[It-1] + DenseMap<Value *, Value *> PrevItValueMap; + + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { + ValueToValueMapTy VMap; + BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); + Header->getParent()->getBasicBlockList().push_back(New); + + if (ForeBlocks.count(*BB)) { + L->addBasicBlockToLoop(New, *LI); + + if (*BB == ForeBlocksFirst[0]) + ForeBlocksFirst.push_back(New); + if (*BB == ForeBlocksLast[0]) + ForeBlocksLast.push_back(New); + } else if (SubLoopBlocks.count(*BB)) { + SubLoop->addBasicBlockToLoop(New, *LI); + + if (*BB == SubLoopBlocksFirst[0]) + SubLoopBlocksFirst.push_back(New); + if (*BB == SubLoopBlocksLast[0]) + SubLoopBlocksLast.push_back(New); + } else if (AftBlocks.count(*BB)) { + L->addBasicBlockToLoop(New, *LI); + + if (*BB == AftBlocksFirst[0]) + AftBlocksFirst.push_back(New); + if (*BB == AftBlocksLast[0]) + AftBlocksLast.push_back(New); + } else { + llvm_unreachable("BB being cloned should be in Fore/Sub/Aft"); + } + + // Update our running maps of newest clones + PrevItValueMap[New] = (It == 1 ? *BB : LastValueMap[*BB]); + LastValueMap[*BB] = New; + for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); + VI != VE; ++VI) { + PrevItValueMap[VI->second] = + const_cast<Value *>(It == 1 ? VI->first : LastValueMap[VI->first]); + LastValueMap[VI->first] = VI->second; + } + + NewBlocks.push_back(New); + + // Update DomTree: + if (*BB == ForeBlocksFirst[0]) + DT->addNewBlock(New, ForeBlocksLast[It - 1]); + else if (*BB == SubLoopBlocksFirst[0]) + DT->addNewBlock(New, SubLoopBlocksLast[It - 1]); + else if (*BB == AftBlocksFirst[0]) + DT->addNewBlock(New, AftBlocksLast[It - 1]); + else { + // Each set of blocks (Fore/Sub/Aft) will have the same internal domtree + // structure. + auto BBDomNode = DT->getNode(*BB); + auto BBIDom = BBDomNode->getIDom(); + BasicBlock *OriginalBBIDom = BBIDom->getBlock(); + assert(OriginalBBIDom); + assert(LastValueMap[cast<Value>(OriginalBBIDom)]); + DT->addNewBlock( + New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)])); + } + } + + // Remap all instructions in the most recent iteration + for (BasicBlock *NewBlock : NewBlocks) { + for (Instruction &I : *NewBlock) { + ::remapInstruction(&I, LastValueMap); + if (auto *II = dyn_cast<IntrinsicInst>(&I)) + if (II->getIntrinsicID() == Intrinsic::assume) + AC->registerAssumption(II); + } + } + + // Alter the ForeBlocks phi's, pointing them at the latest version of the + // value from the previous iteration's phis + for (PHINode &Phi : ForeBlocksFirst[It]->phis()) { + Value *OldValue = Phi.getIncomingValueForBlock(AftBlocksLast[It]); + assert(OldValue && "should have incoming edge from Aft[It]"); + Value *NewValue = OldValue; + if (Value *PrevValue = PrevItValueMap[OldValue]) + NewValue = PrevValue; + + assert(Phi.getNumOperands() == 2); + Phi.setIncomingBlock(0, ForeBlocksLast[It - 1]); + Phi.setIncomingValue(0, NewValue); + Phi.removeIncomingValue(1); + } + } + + // Now that all the basic blocks for the unrolled iterations are in place, + // finish up connecting the blocks and phi nodes. At this point LastValueMap + // is the last unrolled iterations values. + + // Update Phis in BB from OldBB to point to NewBB + auto updatePHIBlocks = [](BasicBlock *BB, BasicBlock *OldBB, + BasicBlock *NewBB) { + for (PHINode &Phi : BB->phis()) { + int I = Phi.getBasicBlockIndex(OldBB); + Phi.setIncomingBlock(I, NewBB); + } + }; + // Update Phis in BB from OldBB to point to NewBB and use the latest value + // from LastValueMap + auto updatePHIBlocksAndValues = [](BasicBlock *BB, BasicBlock *OldBB, + BasicBlock *NewBB, + ValueToValueMapTy &LastValueMap) { + for (PHINode &Phi : BB->phis()) { + for (unsigned b = 0; b < Phi.getNumIncomingValues(); ++b) { + if (Phi.getIncomingBlock(b) == OldBB) { + Value *OldValue = Phi.getIncomingValue(b); + if (Value *LastValue = LastValueMap[OldValue]) + Phi.setIncomingValue(b, LastValue); + Phi.setIncomingBlock(b, NewBB); + break; + } + } + } + }; + // Move all the phis from Src into Dest + auto movePHIs = [](BasicBlock *Src, BasicBlock *Dest) { + Instruction *insertPoint = Dest->getFirstNonPHI(); + while (PHINode *Phi = dyn_cast<PHINode>(Src->begin())) + Phi->moveBefore(insertPoint); + }; + + // Update the PHI values outside the loop to point to the last block + updatePHIBlocksAndValues(LoopExit, AftBlocksLast[0], AftBlocksLast.back(), + LastValueMap); + + // Update ForeBlocks successors and phi nodes + BranchInst *ForeTerm = + cast<BranchInst>(ForeBlocksLast.back()->getTerminator()); + BasicBlock *Dest = SubLoopBlocksFirst[0]; + ForeTerm->setSuccessor(0, Dest); + + if (CompletelyUnroll) { + while (PHINode *Phi = dyn_cast<PHINode>(ForeBlocksFirst[0]->begin())) { + Phi->replaceAllUsesWith(Phi->getIncomingValueForBlock(Preheader)); + Phi->getParent()->getInstList().erase(Phi); + } + } else { + // Update the PHI values to point to the last aft block + updatePHIBlocksAndValues(ForeBlocksFirst[0], AftBlocksLast[0], + AftBlocksLast.back(), LastValueMap); + } + + for (unsigned It = 1; It != Count; It++) { + // Remap ForeBlock successors from previous iteration to this + BranchInst *ForeTerm = + cast<BranchInst>(ForeBlocksLast[It - 1]->getTerminator()); + BasicBlock *Dest = ForeBlocksFirst[It]; + ForeTerm->setSuccessor(0, Dest); + } + + // Subloop successors and phis + BranchInst *SubTerm = + cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator()); + SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]); + SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]); + updatePHIBlocks(SubLoopBlocksFirst[0], ForeBlocksLast[0], + ForeBlocksLast.back()); + updatePHIBlocks(SubLoopBlocksFirst[0], SubLoopBlocksLast[0], + SubLoopBlocksLast.back()); + + for (unsigned It = 1; It != Count; It++) { + // Replace the conditional branch of the previous iteration subloop with an + // unconditional one to this one + BranchInst *SubTerm = + cast<BranchInst>(SubLoopBlocksLast[It - 1]->getTerminator()); + BranchInst::Create(SubLoopBlocksFirst[It], SubTerm); + SubTerm->eraseFromParent(); + + updatePHIBlocks(SubLoopBlocksFirst[It], ForeBlocksLast[It], + ForeBlocksLast.back()); + updatePHIBlocks(SubLoopBlocksFirst[It], SubLoopBlocksLast[It], + SubLoopBlocksLast.back()); + movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]); + } + + // Aft blocks successors and phis + BranchInst *Term = cast<BranchInst>(AftBlocksLast.back()->getTerminator()); + if (CompletelyUnroll) { + BranchInst::Create(LoopExit, Term); + Term->eraseFromParent(); + } else { + Term->setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]); + } + updatePHIBlocks(AftBlocksFirst[0], SubLoopBlocksLast[0], + SubLoopBlocksLast.back()); + + for (unsigned It = 1; It != Count; It++) { + // Replace the conditional branch of the previous iteration subloop with an + // unconditional one to this one + BranchInst *AftTerm = + cast<BranchInst>(AftBlocksLast[It - 1]->getTerminator()); + BranchInst::Create(AftBlocksFirst[It], AftTerm); + AftTerm->eraseFromParent(); + + updatePHIBlocks(AftBlocksFirst[It], SubLoopBlocksLast[It], + SubLoopBlocksLast.back()); + movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]); + } + + // Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the + // new ones required. + if (Count != 1) { + SmallVector<DominatorTree::UpdateType, 4> DTUpdates; + DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, ForeBlocksLast[0], + SubLoopBlocksFirst[0]); + DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, + SubLoopBlocksLast[0], AftBlocksFirst[0]); + + DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert, + ForeBlocksLast.back(), SubLoopBlocksFirst[0]); + DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert, + SubLoopBlocksLast.back(), AftBlocksFirst[0]); + DT->applyUpdates(DTUpdates); + } + + // Merge adjacent basic blocks, if possible. + SmallPtrSet<BasicBlock *, 16> MergeBlocks; + MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end()); + MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end()); + MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end()); + while (!MergeBlocks.empty()) { + BasicBlock *BB = *MergeBlocks.begin(); + BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator()); + if (Term && Term->isUnconditional() && L->contains(Term->getSuccessor(0))) { + BasicBlock *Dest = Term->getSuccessor(0); + if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) { + // Don't remove BB and add Fold as they are the same BB + assert(Fold == BB); + (void)Fold; + MergeBlocks.erase(Dest); + } else + MergeBlocks.erase(BB); + } else + MergeBlocks.erase(BB); + } + + // At this point, the code is well formed. We now do a quick sweep over the + // inserted code, doing constant propagation and dead code elimination as we + // go. + simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC); + simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC); + + NumCompletelyUnrolledAndJammed += CompletelyUnroll; + ++NumUnrolledAndJammed; + +#ifndef NDEBUG + // We shouldn't have done anything to break loop simplify form or LCSSA. + Loop *OuterL = L->getParentLoop(); + Loop *OutestLoop = OuterL ? OuterL : (!CompletelyUnroll ? L : SubLoop); + assert(OutestLoop->isRecursivelyLCSSAForm(*DT, *LI)); + if (!CompletelyUnroll) + assert(L->isLoopSimplifyForm()); + assert(SubLoop->isLoopSimplifyForm()); + assert(DT->verify()); +#endif + + // Update LoopInfo if the loop is completely removed. + if (CompletelyUnroll) + LI->erase(L); + + return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled + : LoopUnrollResult::PartiallyUnrolled; +} + +static bool getLoadsAndStores(BasicBlockSet &Blocks, + SmallVector<Value *, 4> &MemInstr) { + // Scan the BBs and collect legal loads and stores. + // Returns false if non-simple loads/stores are found. + for (BasicBlock *BB : Blocks) { + for (Instruction &I : *BB) { + if (auto *Ld = dyn_cast<LoadInst>(&I)) { + if (!Ld->isSimple()) + return false; + MemInstr.push_back(&I); + } else if (auto *St = dyn_cast<StoreInst>(&I)) { + if (!St->isSimple()) + return false; + MemInstr.push_back(&I); + } else if (I.mayReadOrWriteMemory()) { + return false; + } + } + } + return true; +} + +static bool checkDependencies(SmallVector<Value *, 4> &Earlier, + SmallVector<Value *, 4> &Later, + unsigned LoopDepth, bool InnerLoop, + DependenceInfo &DI) { + // Use DA to check for dependencies between loads and stores that make unroll + // and jam invalid + for (Value *I : Earlier) { + for (Value *J : Later) { + Instruction *Src = cast<Instruction>(I); + Instruction *Dst = cast<Instruction>(J); + if (Src == Dst) + continue; + // Ignore Input dependencies. + if (isa<LoadInst>(Src) && isa<LoadInst>(Dst)) + continue; + + // Track dependencies, and if we find them take a conservative approach + // by allowing only = or < (not >), altough some > would be safe + // (depending upon unroll width). + // For the inner loop, we need to disallow any (> <) dependencies + // FIXME: Allow > so long as distance is less than unroll width + if (auto D = DI.depends(Src, Dst, true)) { + assert(D->isOrdered() && "Expected an output, flow or anti dep."); + + if (D->isConfused()) + return false; + if (!InnerLoop) { + if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT) + return false; + } else { + assert(LoopDepth + 1 <= D->getLevels()); + if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT && + D->getDirection(LoopDepth + 1) & Dependence::DVEntry::LT) + return false; + } + } + } + } + return true; +} + +static bool checkDependencies(Loop *L, BasicBlockSet &ForeBlocks, + BasicBlockSet &SubLoopBlocks, + BasicBlockSet &AftBlocks, DependenceInfo &DI) { + // Get all loads/store pairs for each blocks + SmallVector<Value *, 4> ForeMemInstr; + SmallVector<Value *, 4> SubLoopMemInstr; + SmallVector<Value *, 4> AftMemInstr; + if (!getLoadsAndStores(ForeBlocks, ForeMemInstr) || + !getLoadsAndStores(SubLoopBlocks, SubLoopMemInstr) || + !getLoadsAndStores(AftBlocks, AftMemInstr)) + return false; + + // Check for dependencies between any blocks that may change order + unsigned LoopDepth = L->getLoopDepth(); + return checkDependencies(ForeMemInstr, SubLoopMemInstr, LoopDepth, false, + DI) && + checkDependencies(ForeMemInstr, AftMemInstr, LoopDepth, false, DI) && + checkDependencies(SubLoopMemInstr, AftMemInstr, LoopDepth, false, + DI) && + checkDependencies(SubLoopMemInstr, SubLoopMemInstr, LoopDepth, true, + DI); +} + +bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT, + DependenceInfo &DI) { + /* We currently handle outer loops like this: + | + ForeFirst <----\ } + Blocks | } ForeBlocks + ForeLast | } + | | + SubLoopFirst <\ | } + Blocks | | } SubLoopBlocks + SubLoopLast -/ | } + | | + AftFirst | } + Blocks | } AftBlocks + AftLast ------/ } + | + + There are (theoretically) any number of blocks in ForeBlocks, SubLoopBlocks + and AftBlocks, providing that there is one edge from Fores to SubLoops, + one edge from SubLoops to Afts and a single outer loop exit (from Afts). + In practice we currently limit Aft blocks to a single block, and limit + things further in the profitablility checks of the unroll and jam pass. + + Because of the way we rearrange basic blocks, we also require that + the Fore blocks on all unrolled iterations are safe to move before the + SubLoop blocks of all iterations. So we require that the phi node looping + operands of ForeHeader can be moved to at least the end of ForeEnd, so that + we can arrange cloned Fore Blocks before the subloop and match up Phi's + correctly. + + i.e. The old order of blocks used to be F1 S1_1 S1_2 A1 F2 S2_1 S2_2 A2. + It needs to be safe to tranform this to F1 F2 S1_1 S2_1 S1_2 S2_2 A1 A2. + + There are then a number of checks along the lines of no calls, no + exceptions, inner loop IV is consistent, etc. Note that for loops requiring + runtime unrolling, UnrollRuntimeLoopRemainder can also fail in + UnrollAndJamLoop if the trip count cannot be easily calculated. + */ + + if (!L->isLoopSimplifyForm() || L->getSubLoops().size() != 1) + return false; + Loop *SubLoop = L->getSubLoops()[0]; + if (!SubLoop->isLoopSimplifyForm()) + return false; + + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + BasicBlock *Exit = L->getExitingBlock(); + BasicBlock *SubLoopHeader = SubLoop->getHeader(); + BasicBlock *SubLoopLatch = SubLoop->getLoopLatch(); + BasicBlock *SubLoopExit = SubLoop->getExitingBlock(); + + if (Latch != Exit) + return false; + if (SubLoopLatch != SubLoopExit) + return false; + + if (Header->hasAddressTaken() || SubLoopHeader->hasAddressTaken()) + return false; + + // Split blocks into Fore/SubLoop/Aft based on dominators + BasicBlockSet SubLoopBlocks; + BasicBlockSet ForeBlocks; + BasicBlockSet AftBlocks; + if (!partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, + AftBlocks, &DT)) + return false; + + // Aft blocks may need to move instructions to fore blocks, which becomes more + // difficult if there are multiple (potentially conditionally executed) + // blocks. For now we just exclude loops with multiple aft blocks. + if (AftBlocks.size() != 1) + return false; + + // Check inner loop IV is consistent between all iterations + const SCEV *SubLoopBECountSC = SE.getExitCount(SubLoop, SubLoopLatch); + if (isa<SCEVCouldNotCompute>(SubLoopBECountSC) || + !SubLoopBECountSC->getType()->isIntegerTy()) + return false; + ScalarEvolution::LoopDisposition LD = + SE.getLoopDisposition(SubLoopBECountSC, L); + if (LD != ScalarEvolution::LoopInvariant) + return false; + + // Check the loop safety info for exceptions. + LoopSafetyInfo LSI; + computeLoopSafetyInfo(&LSI, L); + if (LSI.MayThrow) + return false; + + // We've ruled out the easy stuff and now need to check that there are no + // interdependencies which may prevent us from moving the: + // ForeBlocks before Subloop and AftBlocks. + // Subloop before AftBlocks. + // ForeBlock phi operands before the subloop + + // Make sure we can move all instructions we need to before the subloop + if (!processHeaderPhiOperands( + Header, Latch, AftBlocks, [&AftBlocks, &SubLoop](Instruction *I) { + if (SubLoop->contains(I->getParent())) + return false; + if (AftBlocks.count(I->getParent())) { + // If we hit a phi node in afts we know we are done (probably + // LCSSA) + if (isa<PHINode>(I)) + return false; + // Can't move instructions with side effects or memory + // reads/writes + if (I->mayHaveSideEffects() || I->mayReadOrWriteMemory()) + return false; + } + // Keep going + return true; + })) + return false; + + // Check for memory dependencies which prohibit the unrolling we are doing. + // Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check + // there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub. + if (!checkDependencies(L, ForeBlocks, SubLoopBlocks, AftBlocks, DI)) + return false; + + return true; +} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp index c84ae7d693d7..13794c53f24b 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" @@ -30,6 +31,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -46,6 +48,7 @@ #include <limits> using namespace llvm; +using namespace llvm::PatternMatch; #define DEBUG_TYPE "loop-unroll" @@ -66,7 +69,7 @@ static const unsigned InfiniteIterationsToInvariance = std::numeric_limits<unsigned>::max(); // Check whether we are capable of peeling this loop. -static bool canPeel(Loop *L) { +bool llvm::canPeel(Loop *L) { // Make sure the loop is in simplified form if (!L->isLoopSimplifyForm()) return false; @@ -136,11 +139,109 @@ static unsigned calculateIterationsToInvariance( return ToInvariance; } +// Return the number of iterations to peel off that make conditions in the +// body true/false. For example, if we peel 2 iterations off the loop below, +// the condition i < 2 can be evaluated at compile time. +// for (i = 0; i < n; i++) +// if (i < 2) +// .. +// else +// .. +// } +static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, + ScalarEvolution &SE) { + assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form"); + unsigned DesiredPeelCount = 0; + + for (auto *BB : L.blocks()) { + auto *BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BI || BI->isUnconditional()) + continue; + + // Ignore loop exit condition. + if (L.getLoopLatch() == BB) + continue; + + Value *Condition = BI->getCondition(); + Value *LeftVal, *RightVal; + CmpInst::Predicate Pred; + if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal)))) + continue; + + const SCEV *LeftSCEV = SE.getSCEV(LeftVal); + const SCEV *RightSCEV = SE.getSCEV(RightVal); + + // Do not consider predicates that are known to be true or false + // independently of the loop iteration. + if (SE.isKnownPredicate(Pred, LeftSCEV, RightSCEV) || + SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), LeftSCEV, + RightSCEV)) + continue; + + // Check if we have a condition with one AddRec and one non AddRec + // expression. Normalize LeftSCEV to be the AddRec. + if (!isa<SCEVAddRecExpr>(LeftSCEV)) { + if (isa<SCEVAddRecExpr>(RightSCEV)) { + std::swap(LeftSCEV, RightSCEV); + Pred = ICmpInst::getSwappedPredicate(Pred); + } else + continue; + } + + const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV); + + // Avoid huge SCEV computations in the loop below, make sure we only + // consider AddRecs of the loop we are trying to peel and avoid + // non-monotonic predicates, as we will not be able to simplify the loop + // body. + // FIXME: For the non-monotonic predicates ICMP_EQ and ICMP_NE we can + // simplify the loop, if we peel 1 additional iteration, if there + // is no wrapping. + bool Increasing; + if (!LeftAR->isAffine() || LeftAR->getLoop() != &L || + !SE.isMonotonicPredicate(LeftAR, Pred, Increasing)) + continue; + (void)Increasing; + + // Check if extending the current DesiredPeelCount lets us evaluate Pred + // or !Pred in the loop body statically. + unsigned NewPeelCount = DesiredPeelCount; + + const SCEV *IterVal = LeftAR->evaluateAtIteration( + SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE); + + // If the original condition is not known, get the negated predicate + // (which holds on the else branch) and check if it is known. This allows + // us to peel of iterations that make the original condition false. + if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV)) + Pred = ICmpInst::getInversePredicate(Pred); + + const SCEV *Step = LeftAR->getStepRecurrence(SE); + while (NewPeelCount < MaxPeelCount && + SE.isKnownPredicate(Pred, IterVal, RightSCEV)) { + IterVal = SE.getAddExpr(IterVal, Step); + NewPeelCount++; + } + + // Only peel the loop if the monotonic predicate !Pred becomes known in the + // first iteration of the loop body after peeling. + if (NewPeelCount > DesiredPeelCount && + SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal, + RightSCEV)) + DesiredPeelCount = NewPeelCount; + } + + return DesiredPeelCount; +} + // Return the number of iterations we want to peel off. void llvm::computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, - unsigned &TripCount) { + unsigned &TripCount, ScalarEvolution &SE) { assert(LoopSize > 0 && "Zero loop size is not allowed!"); + // Save the UP.PeelCount value set by the target in + // TTI.getUnrollingPreferences or by the flag -unroll-peel-count. + unsigned TargetPeelCount = UP.PeelCount; UP.PeelCount = 0; if (!canPeel(L)) return; @@ -149,6 +250,19 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (!L->empty()) return; + // If the user provided a peel count, use that. + bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0; + if (UserPeelCount) { + LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount + << " iterations.\n"); + UP.PeelCount = UnrollForcePeelCount; + return; + } + + // Skip peeling if it's disabled. + if (!UP.AllowPeeling) + return; + // Here we try to get rid of Phis which become invariants after 1, 2, ..., N // iterations of the loop. For this we compute the number for iterations after // which every Phi is guaranteed to become an invariant, and try to peel the @@ -160,7 +274,9 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, SmallDenseMap<PHINode *, unsigned> IterationsToInvariance; // Now go through all Phis to calculate their the number of iterations they // need to become invariants. - unsigned DesiredPeelCount = 0; + // Start the max computation with the UP.PeelCount value set by the target + // in TTI.getUnrollingPreferences or by the flag -unroll-peel-count. + unsigned DesiredPeelCount = TargetPeelCount; BasicBlock *BackEdge = L->getLoopLatch(); assert(BackEdge && "Loop is not in simplified form?"); for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) { @@ -170,15 +286,21 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (ToInvariance != InfiniteIterationsToInvariance) DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance); } + + // Pay respect to limitations implied by loop size and the max peel count. + unsigned MaxPeelCount = UnrollPeelMaxCount; + MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1); + + DesiredPeelCount = std::max(DesiredPeelCount, + countToEliminateCompares(*L, MaxPeelCount, SE)); + if (DesiredPeelCount > 0) { - // Pay respect to limitations implied by loop size and the max peel count. - unsigned MaxPeelCount = UnrollPeelMaxCount; - MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1); DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); // Consider max peel count limitation. assert(DesiredPeelCount > 0 && "Wrong loop size estimation?"); - DEBUG(dbgs() << "Peel " << DesiredPeelCount << " iteration(s) to turn" - << " some Phis into invariants.\n"); + LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount + << " iteration(s) to turn" + << " some Phis into invariants.\n"); UP.PeelCount = DesiredPeelCount; return; } @@ -189,44 +311,37 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (TripCount) return; - // If the user provided a peel count, use that. - bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0; - if (UserPeelCount) { - DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount - << " iterations.\n"); - UP.PeelCount = UnrollForcePeelCount; - return; - } - // If we don't know the trip count, but have reason to believe the average // trip count is low, peeling should be beneficial, since we will usually // hit the peeled section. // We only do this in the presence of profile information, since otherwise // our estimates of the trip count are not reliable enough. - if (UP.AllowPeeling && L->getHeader()->getParent()->hasProfileData()) { + if (L->getHeader()->getParent()->hasProfileData()) { Optional<unsigned> PeelCount = getLoopEstimatedTripCount(L); if (!PeelCount) return; - DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount - << "\n"); + LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount + << "\n"); if (*PeelCount) { if ((*PeelCount <= UnrollPeelMaxCount) && (LoopSize * (*PeelCount + 1) <= UP.Threshold)) { - DEBUG(dbgs() << "Peeling first " << *PeelCount << " iterations.\n"); + LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount + << " iterations.\n"); UP.PeelCount = *PeelCount; return; } - DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n"); - DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); - DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) << "\n"); - DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n"); + LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n"); + LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); + LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) + << "\n"); + LLVM_DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n"); } } } -/// \brief Update the branch weights of the latch of a peeled-off loop +/// Update the branch weights of the latch of a peeled-off loop /// iteration. /// This sets the branch weights for the latch of the recently peeled off loop /// iteration correctly. @@ -267,12 +382,12 @@ static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, } } -/// \brief Clones the body of the loop L, putting it between \p InsertTop and \p +/// Clones the body of the loop L, putting it between \p InsertTop and \p /// InsertBot. /// \param IterNumber The serial number of the iteration currently being /// peeled off. /// \param Exit The exit block of the original loop. -/// \param[out] NewBlocks A list of the the blocks in the newly created clone +/// \param[out] NewBlocks A list of the blocks in the newly created clone /// \param[out] VMap The value map between the loop and the new clone. /// \param LoopBlocks A helper for DFS-traversal of the loop. /// \param LVMap A value-map that maps instructions from the original loop to @@ -376,7 +491,7 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, LVMap[KV.first] = KV.second; } -/// \brief Peel off the first \p PeelCount iterations of loop \p L. +/// Peel off the first \p PeelCount iterations of loop \p L. /// /// Note that this does not peel them off as a single straight-line block. /// Rather, each iteration is peeled off separately, and needs to check the @@ -388,8 +503,8 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA) { - if (!canPeel(L)) - return false; + assert(PeelCount > 0 && "Attempt to peel out zero iterations?"); + assert(canPeel(L) && "Attempt to peel a loop which is not peelable?"); LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); @@ -500,10 +615,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // the original loop body. if (Iter == 0) DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch])); -#ifndef NDEBUG - if (VerifyDomInfo) - DT->verifyDomTree(); -#endif + assert(DT->verify(DominatorTree::VerificationLevel::Fast)); } updateBranchWeights(InsertBot, cast<BranchInst>(VMap[LatchBR]), Iter, diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index f79f423ce019..0057b4ba7ce1 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -21,8 +21,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -33,7 +33,7 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -418,8 +418,9 @@ canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, // UnrollRuntimeMultiExit is true. This will need updating the logic in // connectEpilog/connectProlog. if (!LatchExit->getSinglePredecessor()) { - DEBUG(dbgs() << "Bailout for multi-exit handling when latch exit has >1 " - "predecessor.\n"); + LLVM_DEBUG( + dbgs() << "Bailout for multi-exit handling when latch exit has >1 " + "predecessor.\n"); return false; } // FIXME: We bail out of multi-exit unrolling when epilog loop is generated @@ -528,14 +529,14 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA) { - DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); - DEBUG(L->dump()); - DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" : - dbgs() << "Using prolog remainder.\n"); + LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); + LLVM_DEBUG(L->dump()); + LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" + : dbgs() << "Using prolog remainder.\n"); // Make sure the loop is in canonical form. if (!L->isLoopSimplifyForm()) { - DEBUG(dbgs() << "Not in simplify form!\n"); + LLVM_DEBUG(dbgs() << "Not in simplify form!\n"); return false; } @@ -561,7 +562,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // Support only single exit and exiting block unless multi-exit loop unrolling is enabled. if (!isMultiExitUnrollingEnabled && (!L->getExitingBlock() || OtherExits.size())) { - DEBUG( + LLVM_DEBUG( dbgs() << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " "enabled!\n"); @@ -581,7 +582,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, const SCEV *BECountSC = SE->getExitCount(L, Latch); if (isa<SCEVCouldNotCompute>(BECountSC) || !BECountSC->getType()->isIntegerTy()) { - DEBUG(dbgs() << "Could not compute exit block SCEV\n"); + LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n"); return false; } @@ -591,7 +592,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, const SCEV *TripCountSC = SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); if (isa<SCEVCouldNotCompute>(TripCountSC)) { - DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); + LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); return false; } @@ -601,15 +602,16 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) { - DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); + LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); return false; } // This constraint lets us deal with an overflowing trip count easily; see the // comment on ModVal below. if (Log2_32(Count) > BEWidth) { - DEBUG(dbgs() - << "Count failed constraint on overflow trip count calculation.\n"); + LLVM_DEBUG( + dbgs() + << "Count failed constraint on overflow trip count calculation.\n"); return false; } @@ -763,7 +765,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // values from the cloned region. Also update the dominator info for // OtherExits and their immediate successors, since we have new edges into // OtherExits. - SmallSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks; + SmallPtrSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks; for (auto *BB : OtherExits) { for (auto &II : *BB) { @@ -878,10 +880,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, NewPreHeader, VMap, DT, LI, PreserveLCSSA); } - // If this loop is nested, then the loop unroller changes the code in the - // parent loop, so the Scalar Evolution pass needs to be run again. - if (Loop *ParentLoop = L->getParentLoop()) - SE->forgetLoop(ParentLoop); + // If this loop is nested, then the loop unroller changes the code in the any + // of its parent loops, so the Scalar Evolution pass needs to be run again. + SE->forgetTopmostLoop(L); // Canonicalize to LoopSimplifyForm both original and remainder loops. We // cannot rely on the LoopUnrollPass to do this because it only does @@ -897,7 +898,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, } if (remainderLoop && UnrollRemainder) { - DEBUG(dbgs() << "Unrolling remainder loop\n"); + LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n"); UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1, /*Force*/ false, /*AllowRuntime*/ false, /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true, diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp index 0a357f4b5004..46af120a428b 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -16,8 +16,10 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" @@ -553,47 +555,48 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, AC, DT)) { - DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); + LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); return true; } if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, AC, DT)) { - DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); + LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); return true; } if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB, AC, DT)) { - DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); + LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); return true; } if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB, AC, DT)) { - DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); + LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); return true; } if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB, AC, DT)) { - DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); + LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); return true; } if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB, AC, DT)) { - DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); + LLVM_DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); return true; } if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, AC, DT)) { - DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); + LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); return true; } if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, AC, DT)) { - DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); + LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); return true; } if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB, AC, DT)) { - DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n"); + LLVM_DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi + << "\n"); return true; } // Not a reduction of known type. @@ -921,13 +924,13 @@ bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, } /// This function is called when we suspect that the update-chain of a phi node -/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, -/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime -/// predicate P under which the SCEV expression for the phi can be the -/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the -/// cast instructions that are involved in the update-chain of this induction. -/// A caller that adds the required runtime predicate can be free to drop these -/// cast instructions, and compute the phi using \p AR (instead of some scev +/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, +/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime +/// predicate P under which the SCEV expression for the phi can be the +/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the +/// cast instructions that are involved in the update-chain of this induction. +/// A caller that adds the required runtime predicate can be free to drop these +/// cast instructions, and compute the phi using \p AR (instead of some scev /// expression with casts). /// /// For example, without a predicate the scev expression can take the following @@ -962,7 +965,7 @@ static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression"); const Loop *L = AR->getLoop(); - // Find any cast instructions that participate in the def-use chain of + // Find any cast instructions that participate in the def-use chain of // PhiScev in the loop. // FORNOW/TODO: We currently expect the def-use chain to include only // two-operand instructions, where one of the operands is an invariant. @@ -1050,7 +1053,7 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, AR = PSE.getAsAddRec(Phi); if (!AR) { - DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); + LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); return false; } @@ -1084,14 +1087,15 @@ bool InductionDescriptor::isInductionPHI( const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); if (!AR) { - DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); + LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); return false; } if (AR->getLoop() != TheLoop) { // FIXME: We should treat this as a uniform. Unfortunately, we // don't currently know how to handled uniform PHIs. - DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); + LLVM_DEBUG( + dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); return false; } @@ -1172,11 +1176,12 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA); if (!NewExitBB) - DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: " - << *L << "\n"); + LLVM_DEBUG( + dbgs() << "WARNING: Can't create a dedicated exit block for loop: " + << *L << "\n"); else - DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " - << NewExitBB->getName() << "\n"); + LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " + << NewExitBB->getName() << "\n"); return true; }; @@ -1199,7 +1204,7 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, return Changed; } -/// \brief Returns the instructions that use values defined in the loop. +/// Returns the instructions that use values defined in the loop. SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { SmallVector<Instruction *, 8> UsedOutside; @@ -1276,7 +1281,7 @@ void llvm::initializeLoopPassPass(PassRegistry &Registry) { INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) } -/// \brief Find string metadata for loop +/// Find string metadata for loop /// /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an /// operand or null otherwise. If the string metadata is not found return @@ -1428,6 +1433,32 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr, DT->deleteEdge(Preheader, L->getHeader()); } + // Given LCSSA form is satisfied, we should not have users of instructions + // within the dead loop outside of the loop. However, LCSSA doesn't take + // unreachable uses into account. We handle them here. + // We could do it after drop all references (in this case all users in the + // loop will be already eliminated and we have less work to do but according + // to API doc of User::dropAllReferences only valid operation after dropping + // references, is deletion. So let's substitute all usages of + // instruction from the loop with undef value of corresponding type first. + for (auto *Block : L->blocks()) + for (Instruction &I : *Block) { + auto *Undef = UndefValue::get(I.getType()); + for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E;) { + Use &U = *UI; + ++UI; + if (auto *Usr = dyn_cast<Instruction>(U.getUser())) + if (L->contains(Usr->getParent())) + continue; + // If we have a DT then we can check that uses outside a loop only in + // unreachable block. + if (DT) + assert(!DT->isReachableFromEntry(U) && + "Unexpected user in reachable block"); + U.set(Undef); + } + } + // Remove the block from the reference counting scheme, so that we can // delete it freely later. for (auto *Block : L->blocks()) @@ -1455,54 +1486,12 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr, } } -/// Returns true if the instruction in a loop is guaranteed to execute at least -/// once. -bool llvm::isGuaranteedToExecute(const Instruction &Inst, - const DominatorTree *DT, const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo) { - // We have to check to make sure that the instruction dominates all - // of the exit blocks. If it doesn't, then there is a path out of the loop - // which does not execute this instruction, so we can't hoist it. - - // If the instruction is in the header block for the loop (which is very - // common), it is always guaranteed to dominate the exit blocks. Since this - // is a common case, and can save some work, check it now. - if (Inst.getParent() == CurLoop->getHeader()) - // If there's a throw in the header block, we can't guarantee we'll reach - // Inst. - return !SafetyInfo->HeaderMayThrow; - - // Somewhere in this loop there is an instruction which may throw and make us - // exit the loop. - if (SafetyInfo->MayThrow) - return false; - - // Get the exit blocks for the current loop. - SmallVector<BasicBlock *, 8> ExitBlocks; - CurLoop->getExitBlocks(ExitBlocks); - - // Verify that the block dominates each of the exit blocks of the loop. - for (BasicBlock *ExitBlock : ExitBlocks) - if (!DT->dominates(Inst.getParent(), ExitBlock)) - return false; - - // As a degenerate case, if the loop is statically infinite then we haven't - // proven anything since there are no exit blocks. - if (ExitBlocks.empty()) - return false; - - // FIXME: In general, we have to prove that the loop isn't an infinite loop. - // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is - // just a special case of this.) - return true; -} - Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { // Only support loops with a unique exiting block, and a latch. if (!L->getExitingBlock()) return None; - // Get the branch weights for the the loop's backedge. + // Get the branch weights for the loop's backedge. BranchInst *LatchBR = dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator()); if (!LatchBR || LatchBR->getNumSuccessors() != 2) @@ -1530,7 +1519,7 @@ Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { return (FalseVal + (TrueVal / 2)) / TrueVal; } -/// \brief Adds a 'fast' flag to floating point operations. +/// Adds a 'fast' flag to floating point operations. static Value *addFastMathFlag(Value *V) { if (isa<FPMathOperator>(V)) { FastMathFlags Flags; @@ -1540,6 +1529,38 @@ static Value *addFastMathFlag(Value *V) { return V; } +// Helper to generate an ordered reduction. +Value * +llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src, + unsigned Op, + RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, + ArrayRef<Value *> RedOps) { + unsigned VF = Src->getType()->getVectorNumElements(); + + // Extract and apply reduction ops in ascending order: + // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1] + Value *Result = Acc; + for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) { + Value *Ext = + Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx)); + + if (Op != Instruction::ICmp && Op != Instruction::FCmp) { + Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext, + "bin.rdx"); + } else { + assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && + "Invalid min/max"); + Result = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, Result, + Ext); + } + + if (!RedOps.empty()) + propagateIRFlags(Result, RedOps); + } + + return Result; +} + // Helper to generate a log2 shuffle reduction. Value * llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, diff --git a/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp index 29756d9dab7f..abbcd5f9e3b8 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp @@ -140,9 +140,12 @@ void LoopVersioning::addPHINodes( if (!PN) { PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver", &PHIBlock->front()); - for (auto *User : Inst->users()) - if (!VersionedLoop->contains(cast<Instruction>(User)->getParent())) - User->replaceUsesOfWith(Inst, PN); + SmallVector<User*, 8> UsersToUpdate; + for (User *U : Inst->users()) + if (!VersionedLoop->contains(cast<Instruction>(U)->getParent())) + UsersToUpdate.push_back(U); + for (User *U : UsersToUpdate) + U->replaceUsesOfWith(Inst, PN); PN->addIncoming(Inst, VersionedLoop->getExitingBlock()); } } @@ -248,7 +251,7 @@ void LoopVersioning::annotateInstWithNoAlias(Instruction *VersionedInst, } namespace { -/// \brief Also expose this is a pass. Currently this is only used for +/// Also expose this is a pass. Currently this is only used for /// unit-testing. It adds all memchecks necessary to remove all may-aliasing /// array accesses from the loop. class LoopVersioningPass : public FunctionPass { diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp index ee84541e526d..c852d538b0d1 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp @@ -21,7 +21,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" using namespace llvm; #define DEBUG_TYPE "lowerinvoke" @@ -48,10 +48,12 @@ static bool runImpl(Function &F) { bool Changed = false; for (BasicBlock &BB : F) if (InvokeInst *II = dyn_cast<InvokeInst>(BB.getTerminator())) { - SmallVector<Value *, 16> CallArgs(II->op_begin(), II->op_end() - 3); + SmallVector<Value *, 16> CallArgs(II->arg_begin(), II->arg_end()); + SmallVector<OperandBundleDef, 1> OpBundles; + II->getOperandBundlesAsDefs(OpBundles); // Insert a normal call instruction... CallInst *NewCall = - CallInst::Create(II->getCalledValue(), CallArgs, "", II); + CallInst::Create(II->getCalledValue(), CallArgs, OpBundles, "", II); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); diff --git a/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp index 57dc225e9dab..03006ef3a2d3 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -409,8 +409,8 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, /* SrcAddr */ Memcpy->getRawSource(), /* DstAddr */ Memcpy->getRawDest(), /* CopyLen */ CI, - /* SrcAlign */ Memcpy->getAlignment(), - /* DestAlign */ Memcpy->getAlignment(), + /* SrcAlign */ Memcpy->getSourceAlignment(), + /* DestAlign */ Memcpy->getDestAlignment(), /* SrcIsVolatile */ Memcpy->isVolatile(), /* DstIsVolatile */ Memcpy->isVolatile(), /* TargetTransformInfo */ TTI); @@ -419,8 +419,8 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, /* SrcAddr */ Memcpy->getRawSource(), /* DstAddr */ Memcpy->getRawDest(), /* CopyLen */ Memcpy->getLength(), - /* SrcAlign */ Memcpy->getAlignment(), - /* DestAlign */ Memcpy->getAlignment(), + /* SrcAlign */ Memcpy->getSourceAlignment(), + /* DestAlign */ Memcpy->getDestAlignment(), /* SrcIsVolatile */ Memcpy->isVolatile(), /* DstIsVolatile */ Memcpy->isVolatile(), /* TargetTransfomrInfo */ TTI); @@ -432,8 +432,8 @@ void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) { /* SrcAddr */ Memmove->getRawSource(), /* DstAddr */ Memmove->getRawDest(), /* CopyLen */ Memmove->getLength(), - /* SrcAlign */ Memmove->getAlignment(), - /* DestAlign */ Memmove->getAlignment(), + /* SrcAlign */ Memmove->getSourceAlignment(), + /* DestAlign */ Memmove->getDestAlignment(), /* SrcIsVolatile */ Memmove->isVolatile(), /* DstIsVolatile */ Memmove->isVolatile()); } @@ -443,6 +443,6 @@ void llvm::expandMemSetAsLoop(MemSetInst *Memset) { /* DstAddr */ Memset->getRawDest(), /* CopyLen */ Memset->getLength(), /* SetValue */ Memset->getValue(), - /* Alignment */ Memset->getAlignment(), + /* Alignment */ Memset->getDestAlignment(), Memset->isVolatile()); } diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp index 344cb35df986..e99ecfef19cd 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp @@ -29,7 +29,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include <algorithm> #include <cassert> @@ -74,7 +74,7 @@ namespace { LowerSwitch() : FunctionPass(ID) { initializeLowerSwitchPass(*PassRegistry::getPassRegistry()); - } + } bool runOnFunction(Function &F) override; @@ -155,11 +155,8 @@ bool LowerSwitch::runOnFunction(Function &F) { } /// Used for debugging purposes. -static raw_ostream& operator<<(raw_ostream &O, - const LowerSwitch::CaseVector &C) - LLVM_ATTRIBUTE_USED; - -static raw_ostream& operator<<(raw_ostream &O, +LLVM_ATTRIBUTE_USED +static raw_ostream &operator<<(raw_ostream &O, const LowerSwitch::CaseVector &C) { O << "["; @@ -172,7 +169,7 @@ static raw_ostream& operator<<(raw_ostream &O, return O << "]"; } -/// \brief Update the first occurrence of the "switch statement" BB in the PHI +/// Update the first occurrence of the "switch statement" BB in the PHI /// node with the "new" BB. The other occurrences will: /// /// 1) Be updated by subsequent calls to this function. Switch statements may @@ -245,14 +242,13 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, unsigned Mid = Size / 2; std::vector<CaseRange> LHS(Begin, Begin + Mid); - DEBUG(dbgs() << "LHS: " << LHS << "\n"); + LLVM_DEBUG(dbgs() << "LHS: " << LHS << "\n"); std::vector<CaseRange> RHS(Begin + Mid, End); - DEBUG(dbgs() << "RHS: " << RHS << "\n"); + LLVM_DEBUG(dbgs() << "RHS: " << RHS << "\n"); CaseRange &Pivot = *(Begin + Mid); - DEBUG(dbgs() << "Pivot ==> " - << Pivot.Low->getValue() - << " -" << Pivot.High->getValue() << "\n"); + LLVM_DEBUG(dbgs() << "Pivot ==> " << Pivot.Low->getValue() << " -" + << Pivot.High->getValue() << "\n"); // NewLowerBound here should never be the integer minimal value. // This is because it is computed from a case range that is never @@ -274,20 +270,14 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, NewUpperBound = LHS.back().High; } - DEBUG(dbgs() << "LHS Bounds ==> "; - if (LowerBound) { - dbgs() << LowerBound->getSExtValue(); - } else { - dbgs() << "NONE"; - } - dbgs() << " - " << NewUpperBound->getSExtValue() << "\n"; - dbgs() << "RHS Bounds ==> "; - dbgs() << NewLowerBound->getSExtValue() << " - "; - if (UpperBound) { - dbgs() << UpperBound->getSExtValue() << "\n"; - } else { - dbgs() << "NONE\n"; - }); + LLVM_DEBUG(dbgs() << "LHS Bounds ==> "; if (LowerBound) { + dbgs() << LowerBound->getSExtValue(); + } else { dbgs() << "NONE"; } dbgs() << " - " + << NewUpperBound->getSExtValue() << "\n"; + dbgs() << "RHS Bounds ==> "; + dbgs() << NewLowerBound->getSExtValue() << " - "; if (UpperBound) { + dbgs() << UpperBound->getSExtValue() << "\n"; + } else { dbgs() << "NONE\n"; }); // Create a new node that checks if the value is < pivot. Go to the // left branch if it is and right branch if not. @@ -337,7 +327,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, } else if (Leaf.Low->isZero()) { // Val >= 0 && Val <= Hi --> Val <=u Hi Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High, - "SwitchLeaf"); + "SwitchLeaf"); } else { // Emit V-Lo <=u Hi-Lo Constant* NegLo = ConstantExpr::getNeg(Leaf.Low); @@ -364,7 +354,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, for (uint64_t j = 0; j < Range; ++j) { PN->removeIncomingValue(OrigBlock); } - + int BlockIdx = PN->getBasicBlockIndex(OrigBlock); assert(BlockIdx != -1 && "Switch didn't go to this successor??"); PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf); @@ -382,7 +372,7 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { Cases.push_back(CaseRange(Case.getCaseValue(), Case.getCaseValue(), Case.getCaseSuccessor())); - std::sort(Cases.begin(), Cases.end(), CaseCmp()); + llvm::sort(Cases.begin(), Cases.end(), CaseCmp()); // Merge case into clusters if (Cases.size() >= 2) { @@ -443,9 +433,9 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, // Prepare cases vector. CaseVector Cases; unsigned numCmps = Clusterify(Cases, SI); - DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() - << ". Total compares: " << numCmps << "\n"); - DEBUG(dbgs() << "Cases: " << Cases << "\n"); + LLVM_DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total compares: " << numCmps << "\n"); + LLVM_DEBUG(dbgs() << "Cases: " << Cases << "\n"); (void)numCmps; ConstantInt *LowerBound = nullptr; @@ -505,6 +495,10 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, } #endif + // As the default block in the switch is unreachable, update the PHI nodes + // (remove the entry to the default block) to reflect this. + Default->removePredecessor(OrigBlock); + // Use the most popular block as the new default, reducing the number of // cases. assert(MaxPop > 0 && PopSucc); @@ -518,29 +512,33 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, if (Cases.empty()) { BranchInst::Create(Default, CurBlock); SI->eraseFromParent(); + // As all the cases have been replaced with a single branch, only keep + // one entry in the PHI nodes. + for (unsigned I = 0 ; I < (MaxPop - 1) ; ++I) + PopSucc->removePredecessor(OrigBlock); return; } } + unsigned NrOfDefaults = (SI->getDefaultDest() == Default) ? 1 : 0; + for (const auto &Case : SI->cases()) + if (Case.getCaseSuccessor() == Default) + NrOfDefaults++; + // Create a new, empty default block so that the new hierarchy of // if-then statements go to this and the PHI nodes are happy. BasicBlock *NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); F->getBasicBlockList().insert(Default->getIterator(), NewDefault); BranchInst::Create(Default, NewDefault); - // If there is an entry in any PHI nodes for the default edge, make sure - // to update them as well. - for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) { - PHINode *PN = cast<PHINode>(I); - int BlockIdx = PN->getBasicBlockIndex(OrigBlock); - assert(BlockIdx != -1 && "Switch didn't go to this successor??"); - PN->setIncomingBlock((unsigned)BlockIdx, NewDefault); - } - BasicBlock *SwitchBlock = switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val, OrigBlock, OrigBlock, NewDefault, UnreachableRanges); + // If there are entries in any PHI nodes for the default edge, make sure + // to update them as well. + fixPhis(Default, OrigBlock, NewDefault, NrOfDefaults); + // Branch to our shiny new if-then stuff... BranchInst::Create(SwitchBlock, OrigBlock); diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp index 29f289b62da0..23145e584751 100644 --- a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp @@ -22,7 +22,7 @@ #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include <vector> diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp index 0f7bd76c03ca..323f2552ca80 100644 --- a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp +++ b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp @@ -29,7 +29,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/TypeFinder.h" #include "llvm/Pass.h" -#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils.h" using namespace llvm; diff --git a/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp index dc780542ce68..6d0b96f6aa8a 100644 --- a/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp +++ b/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp @@ -14,19 +14,38 @@ #include "llvm/Transforms/Utils/OrderedInstructions.h" using namespace llvm; +bool OrderedInstructions::localDominates(const Instruction *InstA, + const Instruction *InstB) const { + assert(InstA->getParent() == InstB->getParent() && + "Instructions must be in the same basic block"); + + const BasicBlock *IBB = InstA->getParent(); + auto OBB = OBBMap.find(IBB); + if (OBB == OBBMap.end()) + OBB = OBBMap.insert({IBB, make_unique<OrderedBasicBlock>(IBB)}).first; + return OBB->second->dominates(InstA, InstB); +} + /// Given 2 instructions, use OrderedBasicBlock to check for dominance relation /// if the instructions are in the same basic block, Otherwise, use dominator /// tree. bool OrderedInstructions::dominates(const Instruction *InstA, const Instruction *InstB) const { - const BasicBlock *IBB = InstA->getParent(); // Use ordered basic block to do dominance check in case the 2 instructions // are in the same basic block. - if (IBB == InstB->getParent()) { - auto OBB = OBBMap.find(IBB); - if (OBB == OBBMap.end()) - OBB = OBBMap.insert({IBB, make_unique<OrderedBasicBlock>(IBB)}).first; - return OBB->second->dominates(InstA, InstB); - } + if (InstA->getParent() == InstB->getParent()) + return localDominates(InstA, InstB); return DT->dominates(InstA->getParent(), InstB->getParent()); } + +bool OrderedInstructions::dfsBefore(const Instruction *InstA, + const Instruction *InstB) const { + // Use ordered basic block in case the 2 instructions are in the same basic + // block. + if (InstA->getParent() == InstB->getParent()) + return localDominates(InstA, InstB); + + DomTreeNode *DA = DT->getNode(InstA->getParent()); + DomTreeNode *DB = DT->getNode(InstB->getParent()); + return DA->getDFSNumIn() < DB->getDFSNumIn(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp index d47be6ea566b..2923977b791a 100644 --- a/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp +++ b/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/IR/AssemblyAnnotationWriter.h" @@ -24,6 +25,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -32,7 +34,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/DebugCounter.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/OrderedInstructions.h" #include <algorithm> #define DEBUG_TYPE "predicateinfo" @@ -118,7 +120,7 @@ static bool valueComesBefore(OrderedInstructions &OI, const Value *A, return false; if (ArgA && ArgB) return ArgA->getArgNo() < ArgB->getArgNo(); - return OI.dominates(cast<Instruction>(A), cast<Instruction>(B)); + return OI.dfsBefore(cast<Instruction>(A), cast<Instruction>(B)); } // This compares ValueDFS structures, creating OrderedBasicBlocks where @@ -479,6 +481,19 @@ void PredicateInfo::buildPredicateInfo() { renameUses(OpsToRename); } +// Create a ssa_copy declaration with custom mangling, because +// Intrinsic::getDeclaration does not handle overloaded unnamed types properly: +// all unnamed types get mangled to the same string. We use the pointer +// to the type as name here, as it guarantees unique names for different +// types and we remove the declarations when destroying PredicateInfo. +// It is a workaround for PR38117, because solving it in a fully general way is +// tricky (FIXME). +static Function *getCopyDeclaration(Module *M, Type *Ty) { + std::string Name = "llvm.ssa.copy." + utostr((uintptr_t) Ty); + return cast<Function>(M->getOrInsertFunction( + Name, getType(M->getContext(), Intrinsic::ssa_copy, Ty))); +} + // Given the renaming stack, make all the operands currently on the stack real // by inserting them into the IR. Return the last operation's value. Value *PredicateInfo::materializeStack(unsigned int &Counter, @@ -507,8 +522,9 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter, // order in the case of multiple predicateinfo in the same block. if (isa<PredicateWithEdge>(ValInfo)) { IRBuilder<> B(getBranchTerminator(ValInfo)); - Function *IF = Intrinsic::getDeclaration( - F.getParent(), Intrinsic::ssa_copy, Op->getType()); + Function *IF = getCopyDeclaration(F.getParent(), Op->getType()); + if (IF->user_begin() == IF->user_end()) + CreatedDeclarations.insert(IF); CallInst *PIC = B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++)); PredicateMap.insert({PIC, ValInfo}); @@ -518,8 +534,9 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter, assert(PAssume && "Should not have gotten here without it being an assume"); IRBuilder<> B(PAssume->AssumeInst); - Function *IF = Intrinsic::getDeclaration( - F.getParent(), Intrinsic::ssa_copy, Op->getType()); + Function *IF = getCopyDeclaration(F.getParent(), Op->getType()); + if (IF->user_begin() == IF->user_end()) + CreatedDeclarations.insert(IF); CallInst *PIC = B.CreateCall(IF, Op); PredicateMap.insert({PIC, ValInfo}); Result.Def = PIC; @@ -553,10 +570,11 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) { auto Comparator = [&](const Value *A, const Value *B) { return valueComesBefore(OI, A, B); }; - std::sort(OpsToRename.begin(), OpsToRename.end(), Comparator); + llvm::sort(OpsToRename.begin(), OpsToRename.end(), Comparator); ValueDFS_Compare Compare(OI); // Compute liveness, and rename in O(uses) per Op. for (auto *Op : OpsToRename) { + LLVM_DEBUG(dbgs() << "Visiting " << *Op << "\n"); unsigned Counter = 0; SmallVector<ValueDFS, 16> OrderedUses; const auto &ValueInfo = getValueInfo(Op); @@ -625,15 +643,15 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) { // we want to. bool PossibleCopy = VD.PInfo != nullptr; if (RenameStack.empty()) { - DEBUG(dbgs() << "Rename Stack is empty\n"); + LLVM_DEBUG(dbgs() << "Rename Stack is empty\n"); } else { - DEBUG(dbgs() << "Rename Stack Top DFS numbers are (" - << RenameStack.back().DFSIn << "," - << RenameStack.back().DFSOut << ")\n"); + LLVM_DEBUG(dbgs() << "Rename Stack Top DFS numbers are (" + << RenameStack.back().DFSIn << "," + << RenameStack.back().DFSOut << ")\n"); } - DEBUG(dbgs() << "Current DFS numbers are (" << VD.DFSIn << "," - << VD.DFSOut << ")\n"); + LLVM_DEBUG(dbgs() << "Current DFS numbers are (" << VD.DFSIn << "," + << VD.DFSOut << ")\n"); bool ShouldPush = (VD.Def || PossibleCopy); bool OutOfScope = !stackIsInScope(RenameStack, VD); @@ -652,7 +670,7 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) { if (VD.Def || PossibleCopy) continue; if (!DebugCounter::shouldExecute(RenameCounter)) { - DEBUG(dbgs() << "Skipping execution due to debug counter\n"); + LLVM_DEBUG(dbgs() << "Skipping execution due to debug counter\n"); continue; } ValueDFS &Result = RenameStack.back(); @@ -663,8 +681,9 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) { if (!Result.Def) Result.Def = materializeStack(Counter, RenameStack, Op); - DEBUG(dbgs() << "Found replacement " << *Result.Def << " for " - << *VD.U->get() << " in " << *(VD.U->getUser()) << "\n"); + LLVM_DEBUG(dbgs() << "Found replacement " << *Result.Def << " for " + << *VD.U->get() << " in " << *(VD.U->getUser()) + << "\n"); assert(DT.dominates(cast<Instruction>(Result.Def), *VD.U) && "Predicateinfo def should have dominated this use"); VD.U->set(Result.Def); @@ -702,7 +721,22 @@ PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT, buildPredicateInfo(); } -PredicateInfo::~PredicateInfo() {} +// Remove all declarations we created . The PredicateInfo consumers are +// responsible for remove the ssa_copy calls created. +PredicateInfo::~PredicateInfo() { + // Collect function pointers in set first, as SmallSet uses a SmallVector + // internally and we have to remove the asserting value handles first. + SmallPtrSet<Function *, 20> FunctionPtrs; + for (auto &F : CreatedDeclarations) + FunctionPtrs.insert(&*F); + CreatedDeclarations.clear(); + + for (Function *F : FunctionPtrs) { + assert(F->user_begin() == F->user_end() && + "PredicateInfo consumer did not remove all SSA copies."); + F->eraseFromParent(); + } +} void PredicateInfo::verifyPredicateInfo() const {} @@ -720,6 +754,20 @@ void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AssumptionCacheTracker>(); } +// Replace ssa_copy calls created by PredicateInfo with their operand. +static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) { + for (auto I = inst_begin(F), E = inst_end(F); I != E;) { + Instruction *Inst = &*I++; + const auto *PI = PredInfo.getPredicateInfoFor(Inst); + auto *II = dyn_cast<IntrinsicInst>(Inst); + if (!PI || !II || II->getIntrinsicID() != Intrinsic::ssa_copy) + continue; + + Inst->replaceAllUsesWith(II->getOperand(0)); + Inst->eraseFromParent(); + } +} + bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) { auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); @@ -727,6 +775,8 @@ bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) { PredInfo->print(dbgs()); if (VerifyPredicateInfo) PredInfo->verifyPredicateInfo(); + + replaceCreatedSSACopys(*PredInfo, F); return false; } @@ -735,12 +785,14 @@ PreservedAnalyses PredicateInfoPrinterPass::run(Function &F, auto &DT = AM.getResult<DominatorTreeAnalysis>(F); auto &AC = AM.getResult<AssumptionAnalysis>(F); OS << "PredicateInfo for function: " << F.getName() << "\n"; - make_unique<PredicateInfo>(F, DT, AC)->print(OS); + auto PredInfo = make_unique<PredicateInfo>(F, DT, AC); + PredInfo->print(OS); + replaceCreatedSSACopys(*PredInfo, F); return PreservedAnalyses::all(); } -/// \brief An assembly annotator class to print PredicateInfo information in +/// An assembly annotator class to print PredicateInfo information in /// comments. class PredicateInfoAnnotatedWriter : public AssemblyAnnotationWriter { friend class PredicateInfo; diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index fcd3bd08482a..86e15bbd7f22 100644 --- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -45,7 +46,6 @@ #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/Support/Casting.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include <algorithm> #include <cassert> @@ -164,26 +164,27 @@ struct AllocaInfo { } }; -// Data package used by RenamePass() -class RenamePassData { -public: +/// Data package used by RenamePass(). +struct RenamePassData { using ValVector = std::vector<Value *>; + using LocationVector = std::vector<DebugLoc>; - RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V) - : BB(B), Pred(P), Values(std::move(V)) {} + RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V, LocationVector L) + : BB(B), Pred(P), Values(std::move(V)), Locations(std::move(L)) {} BasicBlock *BB; BasicBlock *Pred; ValVector Values; + LocationVector Locations; }; -/// \brief This assigns and keeps a per-bb relative ordering of load/store +/// This assigns and keeps a per-bb relative ordering of load/store /// instructions in the block that directly load or store an alloca. /// /// This functionality is important because it avoids scanning large basic /// blocks multiple times when promoting many allocas in the same block. class LargeBlockInfo { - /// \brief For each instruction that we track, keep the index of the + /// For each instruction that we track, keep the index of the /// instruction. /// /// The index starts out as the number of the instruction from the start of @@ -242,7 +243,7 @@ struct PromoteMem2Reg { /// Reverse mapping of Allocas. DenseMap<AllocaInst *, unsigned> AllocaLookup; - /// \brief The PhiNodes we're adding. + /// The PhiNodes we're adding. /// /// That map is used to simplify some Phi nodes as we iterate over it, so /// it should have deterministic iterators. We could use a MapVector, but @@ -294,7 +295,7 @@ private: unsigned getNumPreds(const BasicBlock *BB) { unsigned &NP = BBNumPreds[BB]; if (NP == 0) - NP = std::distance(pred_begin(BB), pred_end(BB)) + 1; + NP = pred_size(BB) + 1; return NP - 1; } @@ -303,6 +304,7 @@ private: SmallPtrSetImpl<BasicBlock *> &LiveInBlocks); void RenamePass(BasicBlock *BB, BasicBlock *Pred, RenamePassData::ValVector &IncVals, + RenamePassData::LocationVector &IncLocs, std::vector<RenamePassData> &Worklist); bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); }; @@ -345,7 +347,7 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { } } -/// \brief Rewrite as many loads as possible given a single store. +/// Rewrite as many loads as possible given a single store. /// /// When there is only a single store, we can use the domtree to trivially /// replace all of the dominated loads with the stored value. Do so, and return @@ -475,7 +477,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, // Sort the stores by their index, making it efficient to do a lookup with a // binary search. - std::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first()); + llvm::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first()); // Walk all of the loads from this alloca, replacing them with the nearest // store above them, if any. @@ -509,6 +511,11 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT)) addAssumeNonNull(AC, LI); + // If the replacement value is the load, this must occur in unreachable + // code. + if (ReplVal == LI) + ReplVal = UndefValue::get(LI->getType()); + LI->replaceAllUsesWith(ReplVal); } @@ -631,10 +638,10 @@ void PromoteMem2Reg::run() { SmallVector<BasicBlock *, 32> PHIBlocks; IDF.calculate(PHIBlocks); if (PHIBlocks.size() > 1) - std::sort(PHIBlocks.begin(), PHIBlocks.end(), - [this](BasicBlock *A, BasicBlock *B) { - return BBNumbers.lookup(A) < BBNumbers.lookup(B); - }); + llvm::sort(PHIBlocks.begin(), PHIBlocks.end(), + [this](BasicBlock *A, BasicBlock *B) { + return BBNumbers.lookup(A) < BBNumbers.lookup(B); + }); unsigned CurrentVersion = 0; for (BasicBlock *BB : PHIBlocks) @@ -653,15 +660,20 @@ void PromoteMem2Reg::run() { for (unsigned i = 0, e = Allocas.size(); i != e; ++i) Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); + // When handling debug info, treat all incoming values as if they have unknown + // locations until proven otherwise. + RenamePassData::LocationVector Locations(Allocas.size()); + // Walks all basic blocks in the function performing the SSA rename algorithm // and inserting the phi nodes we marked as necessary std::vector<RenamePassData> RenamePassWorkList; - RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values)); + RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values), + std::move(Locations)); do { RenamePassData RPD = std::move(RenamePassWorkList.back()); RenamePassWorkList.pop_back(); // RenamePass may add new worklist entries. - RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); + RenamePass(RPD.BB, RPD.Pred, RPD.Values, RPD.Locations, RenamePassWorkList); } while (!RenamePassWorkList.empty()); // The renamer uses the Visited set to avoid infinite loops. Clear it now. @@ -740,7 +752,7 @@ void PromoteMem2Reg::run() { // Ok, now we know that all of the PHI nodes are missing entries for some // basic blocks. Start by sorting the incoming predecessors for efficient // access. - std::sort(Preds.begin(), Preds.end()); + llvm::sort(Preds.begin(), Preds.end()); // Now we loop through all BB's which have entries in SomePHI and remove // them from the Preds list. @@ -772,7 +784,7 @@ void PromoteMem2Reg::run() { NewPhiNodes.clear(); } -/// \brief Determine which blocks the value is live in. +/// Determine which blocks the value is live in. /// /// These are blocks which lead to uses. Knowing this allows us to avoid /// inserting PHI nodes into blocks which don't lead to uses (thus, the @@ -846,7 +858,7 @@ void PromoteMem2Reg::ComputeLiveInBlocks( } } -/// \brief Queue a phi-node to be added to a basic-block for a specific Alloca. +/// Queue a phi-node to be added to a basic-block for a specific Alloca. /// /// Returns true if there wasn't already a phi-node for that variable bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, @@ -868,13 +880,24 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, return true; } -/// \brief Recursively traverse the CFG of the function, renaming loads and +/// Update the debug location of a phi. \p ApplyMergedLoc indicates whether to +/// create a merged location incorporating \p DL, or to set \p DL directly. +static void updateForIncomingValueLocation(PHINode *PN, DebugLoc DL, + bool ApplyMergedLoc) { + if (ApplyMergedLoc) + PN->applyMergedLocation(PN->getDebugLoc(), DL); + else + PN->setDebugLoc(DL); +} + +/// Recursively traverse the CFG of the function, renaming loads and /// stores to the allocas which we are promoting. /// /// IncomingVals indicates what value each Alloca contains on exit from the /// predecessor block Pred. void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, RenamePassData::ValVector &IncomingVals, + RenamePassData::LocationVector &IncomingLocs, std::vector<RenamePassData> &Worklist) { NextIteration: // If we are inserting any phi nodes into this BB, they will already be in the @@ -899,6 +922,10 @@ NextIteration: do { unsigned AllocaNo = PhiToAllocaMap[APN]; + // Update the location of the phi node. + updateForIncomingValueLocation(APN, IncomingLocs[AllocaNo], + APN->getNumIncomingValues() > 0); + // Add N incoming values to the PHI node. for (unsigned i = 0; i != NumEdges; ++i) APN->addIncoming(IncomingVals[AllocaNo], Pred); @@ -960,8 +987,11 @@ NextIteration: continue; // what value were we writing? - IncomingVals[ai->second] = SI->getOperand(0); + unsigned AllocaNo = ai->second; + IncomingVals[AllocaNo] = SI->getOperand(0); + // Record debuginfo for the store before removing it. + IncomingLocs[AllocaNo] = SI->getDebugLoc(); for (DbgInfoIntrinsic *DII : AllocaDbgDeclares[ai->second]) ConvertDebugDeclareToDebugValue(DII, SI, DIB); BB->getInstList().erase(SI); @@ -984,7 +1014,7 @@ NextIteration: for (; I != E; ++I) if (VisitedSuccs.insert(*I).second) - Worklist.emplace_back(*I, Pred, IncomingVals); + Worklist.emplace_back(*I, Pred, IncomingVals, IncomingLocs); goto NextIteration; } diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp index b2231d68a301..ca184ed7c4e3 100644 --- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -178,7 +178,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); - DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); + LLVM_DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); return InsertedPHI; } diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp new file mode 100644 index 000000000000..397bac2940a4 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp @@ -0,0 +1,191 @@ +//===- SSAUpdaterBulk.cpp - Unstructured SSA Update Tool ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SSAUpdaterBulk class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SSAUpdaterBulk.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h" + +using namespace llvm; + +#define DEBUG_TYPE "ssaupdaterbulk" + +/// Helper function for finding a block which should have a value for the given +/// user. For PHI-nodes this block is the corresponding predecessor, for other +/// instructions it's their parent block. +static BasicBlock *getUserBB(Use *U) { + auto *User = cast<Instruction>(U->getUser()); + + if (auto *UserPN = dyn_cast<PHINode>(User)) + return UserPN->getIncomingBlock(*U); + else + return User->getParent(); +} + +/// Add a new variable to the SSA rewriter. This needs to be called before +/// AddAvailableValue or AddUse calls. +unsigned SSAUpdaterBulk::AddVariable(StringRef Name, Type *Ty) { + unsigned Var = Rewrites.size(); + LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var << ": initialized with Ty = " + << *Ty << ", Name = " << Name << "\n"); + RewriteInfo RI(Name, Ty); + Rewrites.push_back(RI); + return Var; +} + +/// Indicate that a rewritten value is available in the specified block with the +/// specified value. +void SSAUpdaterBulk::AddAvailableValue(unsigned Var, BasicBlock *BB, Value *V) { + assert(Var < Rewrites.size() && "Variable not found!"); + LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var + << ": added new available value" << *V << " in " + << BB->getName() << "\n"); + Rewrites[Var].Defines[BB] = V; +} + +/// Record a use of the symbolic value. This use will be updated with a +/// rewritten value when RewriteAllUses is called. +void SSAUpdaterBulk::AddUse(unsigned Var, Use *U) { + assert(Var < Rewrites.size() && "Variable not found!"); + LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var << ": added a use" << *U->get() + << " in " << getUserBB(U)->getName() << "\n"); + Rewrites[Var].Uses.push_back(U); +} + +/// Return true if the SSAUpdater already has a value for the specified variable +/// in the specified block. +bool SSAUpdaterBulk::HasValueForBlock(unsigned Var, BasicBlock *BB) { + return (Var < Rewrites.size()) ? Rewrites[Var].Defines.count(BB) : false; +} + +// Compute value at the given block BB. We either should already know it, or we +// should be able to recursively reach it going up dominator tree. +Value *SSAUpdaterBulk::computeValueAt(BasicBlock *BB, RewriteInfo &R, + DominatorTree *DT) { + if (!R.Defines.count(BB)) { + if (DT->isReachableFromEntry(BB) && PredCache.get(BB).size()) { + BasicBlock *IDom = DT->getNode(BB)->getIDom()->getBlock(); + Value *V = computeValueAt(IDom, R, DT); + R.Defines[BB] = V; + } else + R.Defines[BB] = UndefValue::get(R.Ty); + } + return R.Defines[BB]; +} + +/// Given sets of UsingBlocks and DefBlocks, compute the set of LiveInBlocks. +/// This is basically a subgraph limited by DefBlocks and UsingBlocks. +static void +ComputeLiveInBlocks(const SmallPtrSetImpl<BasicBlock *> &UsingBlocks, + const SmallPtrSetImpl<BasicBlock *> &DefBlocks, + SmallPtrSetImpl<BasicBlock *> &LiveInBlocks, + PredIteratorCache &PredCache) { + // To determine liveness, we must iterate through the predecessors of blocks + // where the def is live. Blocks are added to the worklist if we need to + // check their predecessors. Start with all the using blocks. + SmallVector<BasicBlock *, 64> LiveInBlockWorklist(UsingBlocks.begin(), + UsingBlocks.end()); + + // Now that we have a set of blocks where the phi is live-in, recursively add + // their predecessors until we find the full region the value is live. + while (!LiveInBlockWorklist.empty()) { + BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); + + // The block really is live in here, insert it into the set. If already in + // the set, then it has already been processed. + if (!LiveInBlocks.insert(BB).second) + continue; + + // Since the value is live into BB, it is either defined in a predecessor or + // live into it to. Add the preds to the worklist unless they are a + // defining block. + for (BasicBlock *P : PredCache.get(BB)) { + // The value is not live into a predecessor if it defines the value. + if (DefBlocks.count(P)) + continue; + + // Otherwise it is, add to the worklist. + LiveInBlockWorklist.push_back(P); + } + } +} + +/// Perform all the necessary updates, including new PHI-nodes insertion and the +/// requested uses update. +void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, + SmallVectorImpl<PHINode *> *InsertedPHIs) { + for (auto &R : Rewrites) { + // Compute locations for new phi-nodes. + // For that we need to initialize DefBlocks from definitions in R.Defines, + // UsingBlocks from uses in R.Uses, then compute LiveInBlocks, and then use + // this set for computing iterated dominance frontier (IDF). + // The IDF blocks are the blocks where we need to insert new phi-nodes. + ForwardIDFCalculator IDF(*DT); + LLVM_DEBUG(dbgs() << "SSAUpdater: rewriting " << R.Uses.size() + << " use(s)\n"); + + SmallPtrSet<BasicBlock *, 2> DefBlocks; + for (auto &Def : R.Defines) + DefBlocks.insert(Def.first); + IDF.setDefiningBlocks(DefBlocks); + + SmallPtrSet<BasicBlock *, 2> UsingBlocks; + for (Use *U : R.Uses) + UsingBlocks.insert(getUserBB(U)); + + SmallVector<BasicBlock *, 32> IDFBlocks; + SmallPtrSet<BasicBlock *, 32> LiveInBlocks; + ComputeLiveInBlocks(UsingBlocks, DefBlocks, LiveInBlocks, PredCache); + IDF.resetLiveInBlocks(); + IDF.setLiveInBlocks(LiveInBlocks); + IDF.calculate(IDFBlocks); + + // We've computed IDF, now insert new phi-nodes there. + SmallVector<PHINode *, 4> InsertedPHIsForVar; + for (auto *FrontierBB : IDFBlocks) { + IRBuilder<> B(FrontierBB, FrontierBB->begin()); + PHINode *PN = B.CreatePHI(R.Ty, 0, R.Name); + R.Defines[FrontierBB] = PN; + InsertedPHIsForVar.push_back(PN); + if (InsertedPHIs) + InsertedPHIs->push_back(PN); + } + + // Fill in arguments of the inserted PHIs. + for (auto *PN : InsertedPHIsForVar) { + BasicBlock *PBB = PN->getParent(); + for (BasicBlock *Pred : PredCache.get(PBB)) + PN->addIncoming(computeValueAt(Pred, R, DT), Pred); + } + + // Rewrite actual uses with the inserted definitions. + SmallPtrSet<Use *, 4> ProcessedUses; + for (Use *U : R.Uses) { + if (!ProcessedUses.insert(U).second) + continue; + Value *V = computeValueAt(getUserBB(U), R, DT); + Value *OldVal = U->get(); + assert(OldVal && "Invalid use!"); + // Notify that users of the existing value that it is being replaced. + if (OldVal != V && OldVal->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(OldVal, V); + LLVM_DEBUG(dbgs() << "SSAUpdater: replacing " << *OldVal << " with " << *V + << "\n"); + U->set(V); + } + } +} diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 7c195788e416..c87b5c16ffce 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" @@ -28,6 +27,7 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -66,7 +66,6 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> #include <cassert> @@ -688,9 +687,7 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { // Do not permit merging of large switch instructions into their // predecessors unless there is only one predecessor. - if (SI->getNumSuccessors() * std::distance(pred_begin(SI->getParent()), - pred_end(SI->getParent())) <= - 128) + if (SI->getNumSuccessors() * pred_size(SI->getParent()) <= 128) CV = SI->getCondition(); } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) if (BI->isConditional() && BI->getCondition()->hasOneUse()) @@ -847,9 +844,9 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( // Remove PHI node entries for the dead edge. ThisCases[0].Dest->removePredecessor(TI->getParent()); - DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() - << "Through successor TI: " << *TI << "Leaving: " << *NI - << "\n"); + LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI << "Leaving: " << *NI + << "\n"); EraseTerminatorInstAndDCECond(TI); return true; @@ -861,8 +858,8 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( for (unsigned i = 0, e = PredCases.size(); i != e; ++i) DeadCases.insert(PredCases[i].Value); - DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() - << "Through successor TI: " << *TI); + LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI); // Collect branch weights into a vector. SmallVector<uint32_t, 8> Weights; @@ -888,7 +885,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( if (HasWeight && Weights.size() >= 2) setBranchWeights(SI, Weights); - DEBUG(dbgs() << "Leaving: " << *TI << "\n"); + LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; } @@ -929,9 +926,9 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( Instruction *NI = Builder.CreateBr(TheRealDest); (void)NI; - DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() - << "Through successor TI: " << *TI << "Leaving: " << *NI - << "\n"); + LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI << "Leaving: " << *NI + << "\n"); EraseTerminatorInstAndDCECond(TI); return true; @@ -1290,31 +1287,44 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2)) return Changed; - // For a normal instruction, we just move one to right before the branch, - // then replace all uses of the other with the first. Finally, we remove - // the now redundant second instruction. - BIParent->getInstList().splice(BI->getIterator(), BB1->getInstList(), I1); - if (!I2->use_empty()) - I2->replaceAllUsesWith(I1); - I1->andIRFlags(I2); - unsigned KnownIDs[] = {LLVMContext::MD_tbaa, - LLVMContext::MD_range, - LLVMContext::MD_fpmath, - LLVMContext::MD_invariant_load, - LLVMContext::MD_nonnull, - LLVMContext::MD_invariant_group, - LLVMContext::MD_align, - LLVMContext::MD_dereferenceable, - LLVMContext::MD_dereferenceable_or_null, - LLVMContext::MD_mem_parallel_loop_access}; - combineMetadata(I1, I2, KnownIDs); - - // I1 and I2 are being combined into a single instruction. Its debug - // location is the merged locations of the original instructions. - I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); - - I2->eraseFromParent(); - Changed = true; + if (isa<DbgInfoIntrinsic>(I1) || isa<DbgInfoIntrinsic>(I2)) { + assert (isa<DbgInfoIntrinsic>(I1) && isa<DbgInfoIntrinsic>(I2)); + // The debug location is an integral part of a debug info intrinsic + // and can't be separated from it or replaced. Instead of attempting + // to merge locations, simply hoist both copies of the intrinsic. + BIParent->getInstList().splice(BI->getIterator(), + BB1->getInstList(), I1); + BIParent->getInstList().splice(BI->getIterator(), + BB2->getInstList(), I2); + Changed = true; + } else { + // For a normal instruction, we just move one to right before the branch, + // then replace all uses of the other with the first. Finally, we remove + // the now redundant second instruction. + BIParent->getInstList().splice(BI->getIterator(), + BB1->getInstList(), I1); + if (!I2->use_empty()) + I2->replaceAllUsesWith(I1); + I1->andIRFlags(I2); + unsigned KnownIDs[] = {LLVMContext::MD_tbaa, + LLVMContext::MD_range, + LLVMContext::MD_fpmath, + LLVMContext::MD_invariant_load, + LLVMContext::MD_nonnull, + LLVMContext::MD_invariant_group, + LLVMContext::MD_align, + LLVMContext::MD_dereferenceable, + LLVMContext::MD_dereferenceable_or_null, + LLVMContext::MD_mem_parallel_loop_access}; + combineMetadata(I1, I2, KnownIDs); + + // I1 and I2 are being combined into a single instruction. Its debug + // location is the merged locations of the original instructions. + I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); + + I2->eraseFromParent(); + Changed = true; + } I1 = &*BB1_Itr++; I2 = &*BB2_Itr++; @@ -1728,7 +1738,8 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { LockstepReverseIterator LRI(UnconditionalPreds); while (LRI.isValid() && canSinkInstructions(*LRI, PHIOperands)) { - DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0] << "\n"); + LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0] + << "\n"); InstructionsToSink.insert((*LRI).begin(), (*LRI).end()); ++ScanIdx; --LRI; @@ -1740,7 +1751,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { for (auto *V : PHIOperands[I]) if (InstructionsToSink.count(V) == 0) ++NumPHIdValues; - DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n"); + LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n"); unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size(); if ((NumPHIdValues % UnconditionalPreds.size()) != 0) NumPHIInsts++; @@ -1768,7 +1779,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { if (!Profitable) return false; - DEBUG(dbgs() << "SINK: Splitting edge\n"); + LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n"); // We have a conditional edge and we're going to sink some instructions. // Insert a new block postdominating all blocks we're going to sink from. if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split")) @@ -1790,16 +1801,17 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { // and never actually sink it which means we produce more PHIs than intended. // This is unlikely in practice though. for (unsigned SinkIdx = 0; SinkIdx != ScanIdx; ++SinkIdx) { - DEBUG(dbgs() << "SINK: Sink: " - << *UnconditionalPreds[0]->getTerminator()->getPrevNode() - << "\n"); + LLVM_DEBUG(dbgs() << "SINK: Sink: " + << *UnconditionalPreds[0]->getTerminator()->getPrevNode() + << "\n"); // Because we've sunk every instruction in turn, the current instruction to // sink is always at index 0. LRI.reset(); if (!ProfitableToSinkInstruction(LRI)) { // Too many PHIs would be created. - DEBUG(dbgs() << "SINK: stopping here, too many PHIs would be created!\n"); + LLVM_DEBUG( + dbgs() << "SINK: stopping here, too many PHIs would be created!\n"); break; } @@ -1811,7 +1823,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { return Changed; } -/// \brief Determine if we can hoist sink a sole store instruction out of a +/// Determine if we can hoist sink a sole store instruction out of a /// conditional block. /// /// We are looking for code like the following: @@ -1851,12 +1863,9 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, // Look for a store to the same pointer in BrBB. unsigned MaxNumInstToLookAt = 9; - for (Instruction &CurI : reverse(*BrBB)) { + for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug())) { if (!MaxNumInstToLookAt) break; - // Skip debug info. - if (isa<DbgInfoIntrinsic>(CurI)) - continue; --MaxNumInstToLookAt; // Could be calling an instruction that affects memory like free(). @@ -1875,7 +1884,7 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, return nullptr; } -/// \brief Speculate a conditional basic block flattening the CFG. +/// Speculate a conditional basic block flattening the CFG. /// /// Note that this is a very risky transform currently. Speculating /// instructions like this is most often not desirable. Instead, there is an MI @@ -2045,7 +2054,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, return false; // If we get here, we can hoist the instruction and if-convert. - DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";); + LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";); // Insert a select of the value of the speculated store. if (SpeculatedStoreValue) { @@ -2106,19 +2115,16 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, /// Return true if we can thread a branch across this block. static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { - BranchInst *BI = cast<BranchInst>(BB->getTerminator()); unsigned Size = 0; - for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { - if (isa<DbgInfoIntrinsic>(BBI)) - continue; + for (Instruction &I : BB->instructionsWithoutDebug()) { if (Size > 10) return false; // Don't clone large BB's. ++Size; // We can only support instructions that do not define values that are // live outside of the current basic block. - for (User *U : BBI->users()) { + for (User *U : I.users()) { Instruction *UI = cast<Instruction>(U); if (UI->getParent() != BB || isa<PHINode>(UI)) return false; @@ -2260,6 +2266,10 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, // dependence information for this check, but simplifycfg can't keep it up // to date, and this catches most of the cases we care about anyway. BasicBlock *BB = PN->getParent(); + const Function *Fn = BB->getParent(); + if (Fn && Fn->hasFnAttribute(Attribute::OptForFuzzing)) + return false; + BasicBlock *IfTrue, *IfFalse; Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse); if (!IfCond || @@ -2350,8 +2360,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, } } - DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " - << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); + LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond + << " T: " << IfTrue->getName() + << " F: " << IfFalse->getName() << "\n"); // If we can still promote the PHI nodes after this gauntlet of tests, // do all of the PHI's now. @@ -2475,9 +2486,9 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, (void)RI; - DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" - << "\n " << *BI << "NewRet = " << *RI - << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: " << *FalseSucc); + LLVM_DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" + << "\n " << *BI << "NewRet = " << *RI << "TRUEBLOCK: " + << *TrueSucc << "FALSEBLOCK: " << *FalseSucc); EraseTerminatorInstAndDCECond(BI); @@ -2486,7 +2497,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, /// Return true if the given instruction is available /// in its predecessor block. If yes, the instruction will be removed. -static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) { +static bool tryCSEWithPredecessor(Instruction *Inst, BasicBlock *PB) { if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst)) return false; for (Instruction &I : *PB) { @@ -2543,14 +2554,16 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { if (PBI->isConditional() && (BI->getSuccessor(0) == PBI->getSuccessor(0) || BI->getSuccessor(0) == PBI->getSuccessor(1))) { - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { + for (auto I = BB->instructionsWithoutDebug().begin(), + E = BB->instructionsWithoutDebug().end(); + I != E;) { Instruction *Curr = &*I++; if (isa<CmpInst>(Curr)) { Cond = Curr; break; } // Quit if we can't remove this instruction. - if (!checkCSEInPredecessor(Curr, PB)) + if (!tryCSEWithPredecessor(Curr, PB)) return false; } } @@ -2650,7 +2663,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { continue; } - DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); + LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); IRBuilder<> Builder(PBI); // If we need to invert the condition in the pred block to match, do so now. @@ -2860,7 +2873,7 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, if (!AlternativeV) break; - assert(std::distance(pred_begin(Succ), pred_end(Succ)) == 2); + assert(pred_size(Succ) == 2); auto PredI = pred_begin(Succ); BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI; if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV) @@ -2903,14 +2916,13 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to // thread this store. unsigned N = 0; - for (auto &I : *BB) { + for (auto &I : BB->instructionsWithoutDebug()) { // Cheap instructions viable for folding. if (isa<BinaryOperator>(I) || isa<GetElementPtrInst>(I) || isa<StoreInst>(I)) ++N; // Free instructions. - else if (isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) || - IsaBitcastOfPointerType(I)) + else if (isa<TerminatorInst>(I) || IsaBitcastOfPointerType(I)) continue; else return false; @@ -2965,6 +2977,21 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, if (&*I != PStore && I->mayReadOrWriteMemory()) return false; + // If PostBB has more than two predecessors, we need to split it so we can + // sink the store. + if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) { + // We know that QFB's only successor is PostBB. And QFB has a single + // predecessor. If QTB exists, then its only successor is also PostBB. + // If QTB does not exist, then QFB's only predecessor has a conditional + // branch to QFB and PostBB. + BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor(); + BasicBlock *NewBB = SplitBlockPredecessors(PostBB, { QFB, TruePred}, + "condstore.split"); + if (!NewBB) + return false; + PostBB = NewBB; + } + // OK, we're going to sink the stores to PostBB. The store has to be // conditional though, so first create the predicate. Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator()) @@ -3100,7 +3127,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) || (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB))) return false; - if (!PostBB->hasNUses(2) || !QBI->getParent()->hasNUses(2)) + if (!QBI->getParent()->hasNUses(2)) return false; // OK, this is a sequence of two diamonds or triangles. @@ -3200,11 +3227,9 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // If this is a conditional branch in an empty block, and if any // predecessors are a conditional branch to one of our destinations, // fold the conditions into logical ops and one cond br. - BasicBlock::iterator BBI = BB->begin(); + // Ignore dbg intrinsics. - while (isa<DbgInfoIntrinsic>(BBI)) - ++BBI; - if (&*BBI != BI) + if (&*BB->instructionsWithoutDebug().begin() != BI) return false; int PBIOp, BIOp; @@ -3261,8 +3286,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // Finally, if everything is ok, fold the branches to logical ops. BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); - DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() - << "AND: " << *BI->getParent()); + LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() + << "AND: " << *BI->getParent()); // If OtherDest *is* BB, then BB is a basic block with a single conditional // branch in it, where one edge (OtherDest) goes back to itself but the other @@ -3280,7 +3305,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, OtherDest = InfLoopBlock; } - DEBUG(dbgs() << *PBI->getParent()->getParent()); + LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent()); // BI may have other predecessors. Because of this, we leave // it alone, but modify PBI. @@ -3364,8 +3389,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, } } - DEBUG(dbgs() << "INTO: " << *PBI->getParent()); - DEBUG(dbgs() << *PBI->getParent()->getParent()); + LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent()); + LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent()); // This basic block is probably dead. We know it has at least // one fewer predecessor. @@ -3665,9 +3690,9 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, BasicBlock *BB = BI->getParent(); - DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() - << " cases into SWITCH. BB is:\n" - << *BB); + LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() + << " cases into SWITCH. BB is:\n" + << *BB); // If there are any extra values that couldn't be folded into the switch // then we evaluate them with an explicit branch first. Split the block @@ -3690,8 +3715,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, // for the edge we just added. AddPredecessorToBlock(EdgeBB, BB, NewBB); - DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase - << "\nEXTRABB = " << *BB); + LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase + << "\nEXTRABB = " << *BB); BB = NewBB; } @@ -3722,7 +3747,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, // Erase the old branch instruction. EraseTerminatorInstAndDCECond(BI); - DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n'); + LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n'); return true; } @@ -3873,6 +3898,7 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI) { switch (IntrinsicID) { case Intrinsic::dbg_declare: case Intrinsic::dbg_value: + case Intrinsic::dbg_label: case Intrinsic::lifetime_end: break; default: @@ -4049,8 +4075,8 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { if (!UncondBranchPreds.empty() && DupRet) { while (!UncondBranchPreds.empty()) { BasicBlock *Pred = UncondBranchPreds.pop_back_val(); - DEBUG(dbgs() << "FOLDING: " << *BB - << "INTO UNCOND BRANCH PRED: " << *Pred); + LLVM_DEBUG(dbgs() << "FOLDING: " << *BB + << "INTO UNCOND BRANCH PRED: " << *Pred); (void)FoldReturnIntoUncondBranch(RI, BB, Pred); } @@ -4374,7 +4400,8 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) || (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) { DeadCases.push_back(Case.getCaseValue()); - DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal << " is dead.\n"); + LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal + << " is dead.\n"); } } @@ -4390,7 +4417,7 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, if (HasDefault && DeadCases.empty() && NumUnknownBits < 64 /* avoid overflow */ && SI->getNumCases() == (1ULL << NumUnknownBits)) { - DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); + LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); BasicBlock *NewDefault = SplitBlockPredecessors(SI->getDefaultDest(), SI->getParent(), ""); SI->setDefaultDest(&*NewDefault); @@ -4607,24 +4634,20 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, // which we can constant-propagate the CaseVal, continue to its successor. SmallDenseMap<Value *, Constant *> ConstantPool; ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal)); - for (BasicBlock::iterator I = CaseDest->begin(), E = CaseDest->end(); I != E; - ++I) { - if (TerminatorInst *T = dyn_cast<TerminatorInst>(I)) { + for (Instruction &I :CaseDest->instructionsWithoutDebug()) { + if (TerminatorInst *T = dyn_cast<TerminatorInst>(&I)) { // If the terminator is a simple branch, continue to the next block. if (T->getNumSuccessors() != 1 || T->isExceptional()) return false; Pred = CaseDest; CaseDest = T->getSuccessor(0); - } else if (isa<DbgInfoIntrinsic>(I)) { - // Skip debug intrinsic. - continue; - } else if (Constant *C = ConstantFold(&*I, DL, ConstantPool)) { + } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) { // Instruction is side-effect free and constant. // If the instruction has uses outside this block or a phi node slot for // the block, it is not safe to bypass the instruction since it would then // no longer dominate all its uses. - for (auto &Use : I->uses()) { + for (auto &Use : I.uses()) { User *User = Use.getUser(); if (Instruction *I = dyn_cast<Instruction>(User)) if (I->getParent() == CaseDest) @@ -4635,7 +4658,7 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, return false; } - ConstantPool.insert(std::make_pair(&*I, C)); + ConstantPool.insert(std::make_pair(&I, C)); } else { break; } @@ -4670,30 +4693,31 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, } // Helper function used to add CaseVal to the list of cases that generate -// Result. -static void MapCaseToResult(ConstantInt *CaseVal, - SwitchCaseResultVectorTy &UniqueResults, - Constant *Result) { +// Result. Returns the updated number of cases that generate this result. +static uintptr_t MapCaseToResult(ConstantInt *CaseVal, + SwitchCaseResultVectorTy &UniqueResults, + Constant *Result) { for (auto &I : UniqueResults) { if (I.first == Result) { I.second.push_back(CaseVal); - return; + return I.second.size(); } } UniqueResults.push_back( std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal))); + return 1; } // Helper function that initializes a map containing // results for the PHI node of the common destination block for a switch // instruction. Returns false if multiple PHI nodes have been found or if // there is not a common destination block for the switch. -static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, - BasicBlock *&CommonDest, - SwitchCaseResultVectorTy &UniqueResults, - Constant *&DefaultResult, - const DataLayout &DL, - const TargetTransformInfo &TTI) { +static bool +InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, + SwitchCaseResultVectorTy &UniqueResults, + Constant *&DefaultResult, const DataLayout &DL, + const TargetTransformInfo &TTI, + uintptr_t MaxUniqueResults, uintptr_t MaxCasesPerResult) { for (auto &I : SI->cases()) { ConstantInt *CaseVal = I.getCaseValue(); @@ -4703,10 +4727,21 @@ static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, DL, TTI)) return false; - // Only one value per case is permitted + // Only one value per case is permitted. if (Results.size() > 1) return false; - MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second); + + // Add the case->result mapping to UniqueResults. + const uintptr_t NumCasesForResult = + MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second); + + // Early out if there are too many cases for this result. + if (NumCasesForResult > MaxCasesPerResult) + return false; + + // Early out if there are too many unique results. + if (UniqueResults.size() > MaxUniqueResults) + return false; // Check the PHI consistency. if (!PHI) @@ -4806,7 +4841,7 @@ static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder, SwitchCaseResultVectorTy UniqueResults; // Collect all the cases that will deliver the same value from the switch. if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult, - DL, TTI)) + DL, TTI, 2, 1)) return false; // Selects choose between maximum two values. if (UniqueResults.size() != 2) @@ -5384,8 +5419,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, } bool ReturnedEarly = false; - for (size_t I = 0, E = PHIs.size(); I != E; ++I) { - PHINode *PHI = PHIs[I]; + for (PHINode *PHI : PHIs) { const ResultListTy &ResultList = ResultLists[PHI]; // If using a bitmask, use any value to fill the lookup table holes. @@ -5475,7 +5509,7 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, SmallVector<int64_t,4> Values; for (auto &C : SI->cases()) Values.push_back(C.getCaseValue()->getValue().getSExtValue()); - std::sort(Values.begin(), Values.end()); + llvm::sort(Values.begin(), Values.end()); // If the switch is already dense, there's nothing useful to do here. if (isSwitchDense(Values)) @@ -5558,11 +5592,7 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // If the block only contains the switch, see if we can fold the block // away into any preds. - BasicBlock::iterator BBI = BB->begin(); - // Ignore dbg intrinsics. - while (isa<DbgInfoIntrinsic>(BBI)) - ++BBI; - if (SI == &*BBI) + if (SI == &*BB->instructionsWithoutDebug().begin()) if (FoldValueComparisonIntoPredecessors(SI, Builder)) return simplifyCFG(BB, TTI, Options) | true; } @@ -5649,7 +5679,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { /// any transform which might inhibit optimization (such as our ability to /// specialize a particular handler via tail commoning). We do this by not /// merging any blocks which require us to introduce a phi. Since the same -/// values are flowing through both blocks, we don't loose any ability to +/// values are flowing through both blocks, we don't lose any ability to /// specialize. If anything, we make such specialization more likely. /// /// TODO - This transformation could remove entries from a phi in the target @@ -5679,7 +5709,7 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, // We've found an identical block. Update our predecessors to take that // path instead and make ourselves dead. - SmallSet<BasicBlock *, 16> Preds; + SmallPtrSet<BasicBlock *, 16> Preds; Preds.insert(pred_begin(BB), pred_end(BB)); for (BasicBlock *Pred : Preds) { InvokeInst *II = cast<InvokeInst>(Pred->getTerminator()); @@ -5697,7 +5727,7 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, Inst.eraseFromParent(); } - SmallSet<BasicBlock *, 16> Succs; + SmallPtrSet<BasicBlock *, 16> Succs; Succs.insert(succ_begin(BB), succ_end(BB)); for (BasicBlock *Succ : Succs) { Succ->removePredecessor(BB); @@ -5721,9 +5751,12 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, // header. (This is for early invocations before loop simplify and // vectorization to keep canonical loop forms for nested loops. These blocks // can be eliminated when the pass is invoked later in the back-end.) + // Note that if BB has only one predecessor then we do not introduce new + // backedge, so we can eliminate BB. bool NeedCanonicalLoop = Options.NeedCanonicalLoop && - (LoopHeaders && (LoopHeaders->count(BB) || LoopHeaders->count(Succ))); + (LoopHeaders && pred_size(BB) > 1 && + (LoopHeaders->count(BB) || LoopHeaders->count(Succ))); BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB)) @@ -5771,6 +5804,9 @@ static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) { bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { BasicBlock *BB = BI->getParent(); + const Function *Fn = BB->getParent(); + if (Fn && Fn->hasFnAttribute(Attribute::OptForFuzzing)) + return false; // Conditional branch if (isValueEqualityComparison(BI)) { @@ -5783,18 +5819,12 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // This block must be empty, except for the setcond inst, if it exists. // Ignore dbg intrinsics. - BasicBlock::iterator I = BB->begin(); - // Ignore dbg intrinsics. - while (isa<DbgInfoIntrinsic>(I)) - ++I; + auto I = BB->instructionsWithoutDebug().begin(); if (&*I == BI) { if (FoldValueComparisonIntoPredecessors(BI, Builder)) return simplifyCFG(BB, TTI, Options) | true; } else if (&*I == cast<Instruction>(BI->getCondition())) { ++I; - // Ignore dbg intrinsics. - while (isa<DbgInfoIntrinsic>(I)) - ++I; if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) return simplifyCFG(BB, TTI, Options) | true; } @@ -5920,17 +5950,20 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { // Load from null is undefined. if (LoadInst *LI = dyn_cast<LoadInst>(Use)) if (!LI->isVolatile()) - return LI->getPointerAddressSpace() == 0; + return !NullPointerIsDefined(LI->getFunction(), + LI->getPointerAddressSpace()); // Store to null is undefined. if (StoreInst *SI = dyn_cast<StoreInst>(Use)) if (!SI->isVolatile()) - return SI->getPointerAddressSpace() == 0 && + return (!NullPointerIsDefined(SI->getFunction(), + SI->getPointerAddressSpace())) && SI->getPointerOperand() == I; // A call to null is undefined. if (auto CS = CallSite(Use)) - return CS.getCalledValue() == I; + return !NullPointerIsDefined(CS->getFunction()) && + CS.getCalledValue() == I; } return false; } @@ -5971,7 +6004,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // or that just have themself as a predecessor. These are unreachable. if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) || BB->getSinglePredecessor() == BB) { - DEBUG(dbgs() << "Removing BB: \n" << *BB); + LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB); DeleteDeadBlock(BB); return true; } diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index ad1faea0a7ae..e381fbc34ab4 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -80,6 +81,7 @@ namespace { bool replaceIVUserWithLoopInvariant(Instruction *UseInst); bool eliminateOverflowIntrinsic(CallInst *CI); + bool eliminateTrunc(TruncInst *TI); bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); bool makeIVComparisonInvariant(ICmpInst *ICmp, Value *IVOperand); void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); @@ -147,8 +149,8 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) if (SE->getSCEV(UseInst) != FoldedExpr) return nullptr; - DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand - << " -> " << *UseInst << '\n'); + LLVM_DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand + << " -> " << *UseInst << '\n'); UseInst->setOperand(OperIdx, IVSrc); assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper"); @@ -221,7 +223,7 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp, // for now. return false; - DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n'); + LLVM_DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n'); ICmp->setPredicate(InvariantPredicate); ICmp->setOperand(0, NewLHS); ICmp->setOperand(1, NewRHS); @@ -252,11 +254,11 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { if (SE->isKnownPredicate(Pred, S, X)) { ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext())); DeadInsts.emplace_back(ICmp); - DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); + LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); } else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) { ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext())); DeadInsts.emplace_back(ICmp); - DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); + LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); } else if (makeIVComparisonInvariant(ICmp, IVOperand)) { // fallthrough to end of function } else if (ICmpInst::isSigned(OriginalPred) && @@ -267,7 +269,8 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { // we turn the instruction's predicate to its unsigned version. Note that // we cannot rely on Pred here unless we check if we have swapped it. assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?"); - DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp << '\n'); + LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp + << '\n'); ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred)); } else return; @@ -293,7 +296,7 @@ bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) { SDiv->getName() + ".udiv", SDiv); UDiv->setIsExact(SDiv->isExact()); SDiv->replaceAllUsesWith(UDiv); - DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n'); + LLVM_DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n'); ++NumSimplifiedSDiv; Changed = true; DeadInsts.push_back(SDiv); @@ -309,7 +312,7 @@ void SimplifyIndvar::replaceSRemWithURem(BinaryOperator *Rem) { auto *URem = BinaryOperator::Create(BinaryOperator::URem, N, D, Rem->getName() + ".urem", Rem); Rem->replaceAllUsesWith(URem); - DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem << '\n'); + LLVM_DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem << '\n'); ++NumSimplifiedSRem; Changed = true; DeadInsts.emplace_back(Rem); @@ -318,7 +321,7 @@ void SimplifyIndvar::replaceSRemWithURem(BinaryOperator *Rem) { // i % n --> i if i is in [0,n). void SimplifyIndvar::replaceRemWithNumerator(BinaryOperator *Rem) { Rem->replaceAllUsesWith(Rem->getOperand(0)); - DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); + LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); ++NumElimRem; Changed = true; DeadInsts.emplace_back(Rem); @@ -332,7 +335,7 @@ void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator *Rem) { SelectInst *Sel = SelectInst::Create(ICmp, ConstantInt::get(T, 0), N, "iv.rem", Rem); Rem->replaceAllUsesWith(Sel); - DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); + LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); ++NumElimRem; Changed = true; DeadInsts.emplace_back(Rem); @@ -492,6 +495,118 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) { return true; } +bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) { + // It is always legal to replace + // icmp <pred> i32 trunc(iv), n + // with + // icmp <pred> i64 sext(trunc(iv)), sext(n), if pred is signed predicate. + // Or with + // icmp <pred> i64 zext(trunc(iv)), zext(n), if pred is unsigned predicate. + // Or with either of these if pred is an equality predicate. + // + // If we can prove that iv == sext(trunc(iv)) or iv == zext(trunc(iv)) for + // every comparison which uses trunc, it means that we can replace each of + // them with comparison of iv against sext/zext(n). We no longer need trunc + // after that. + // + // TODO: Should we do this if we can widen *some* comparisons, but not all + // of them? Sometimes it is enough to enable other optimizations, but the + // trunc instruction will stay in the loop. + Value *IV = TI->getOperand(0); + Type *IVTy = IV->getType(); + const SCEV *IVSCEV = SE->getSCEV(IV); + const SCEV *TISCEV = SE->getSCEV(TI); + + // Check if iv == zext(trunc(iv)) and if iv == sext(trunc(iv)). If so, we can + // get rid of trunc + bool DoesSExtCollapse = false; + bool DoesZExtCollapse = false; + if (IVSCEV == SE->getSignExtendExpr(TISCEV, IVTy)) + DoesSExtCollapse = true; + if (IVSCEV == SE->getZeroExtendExpr(TISCEV, IVTy)) + DoesZExtCollapse = true; + + // If neither sext nor zext does collapse, it is not profitable to do any + // transform. Bail. + if (!DoesSExtCollapse && !DoesZExtCollapse) + return false; + + // Collect users of the trunc that look like comparisons against invariants. + // Bail if we find something different. + SmallVector<ICmpInst *, 4> ICmpUsers; + for (auto *U : TI->users()) { + // We don't care about users in unreachable blocks. + if (isa<Instruction>(U) && + !DT->isReachableFromEntry(cast<Instruction>(U)->getParent())) + continue; + if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) { + if (ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) { + assert(L->contains(ICI->getParent()) && "LCSSA form broken?"); + // If we cannot get rid of trunc, bail. + if (ICI->isSigned() && !DoesSExtCollapse) + return false; + if (ICI->isUnsigned() && !DoesZExtCollapse) + return false; + // For equality, either signed or unsigned works. + ICmpUsers.push_back(ICI); + } else + return false; + } else + return false; + } + + auto CanUseZExt = [&](ICmpInst *ICI) { + // Unsigned comparison can be widened as unsigned. + if (ICI->isUnsigned()) + return true; + // Is it profitable to do zext? + if (!DoesZExtCollapse) + return false; + // For equality, we can safely zext both parts. + if (ICI->isEquality()) + return true; + // Otherwise we can only use zext when comparing two non-negative or two + // negative values. But in practice, we will never pass DoesZExtCollapse + // check for a negative value, because zext(trunc(x)) is non-negative. So + // it only make sense to check for non-negativity here. + const SCEV *SCEVOP1 = SE->getSCEV(ICI->getOperand(0)); + const SCEV *SCEVOP2 = SE->getSCEV(ICI->getOperand(1)); + return SE->isKnownNonNegative(SCEVOP1) && SE->isKnownNonNegative(SCEVOP2); + }; + // Replace all comparisons against trunc with comparisons against IV. + for (auto *ICI : ICmpUsers) { + auto *Op1 = ICI->getOperand(1); + Instruction *Ext = nullptr; + // For signed/unsigned predicate, replace the old comparison with comparison + // of immediate IV against sext/zext of the invariant argument. If we can + // use either sext or zext (i.e. we are dealing with equality predicate), + // then prefer zext as a more canonical form. + // TODO: If we see a signed comparison which can be turned into unsigned, + // we can do it here for canonicalization purposes. + ICmpInst::Predicate Pred = ICI->getPredicate(); + if (CanUseZExt(ICI)) { + assert(DoesZExtCollapse && "Unprofitable zext?"); + Ext = new ZExtInst(Op1, IVTy, "zext", ICI); + Pred = ICmpInst::getUnsignedPredicate(Pred); + } else { + assert(DoesSExtCollapse && "Unprofitable sext?"); + Ext = new SExtInst(Op1, IVTy, "sext", ICI); + assert(Pred == ICmpInst::getSignedPredicate(Pred) && "Must be signed!"); + } + bool Changed; + L->makeLoopInvariant(Ext, Changed); + (void)Changed; + ICmpInst *NewICI = new ICmpInst(ICI, Pred, IV, Ext); + ICI->replaceAllUsesWith(NewICI); + DeadInsts.emplace_back(ICI); + } + + // Trunc no longer needed. + TI->replaceAllUsesWith(UndefValue::get(TI->getType())); + DeadInsts.emplace_back(TI); + return true; +} + /// Eliminate an operation that consumes a simple IV and has no observable /// side-effect given the range of IV values. IVOperand is guaranteed SCEVable, /// but UseInst may not be. @@ -516,6 +631,10 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, if (eliminateOverflowIntrinsic(CI)) return true; + if (auto *TI = dyn_cast<TruncInst>(UseInst)) + if (eliminateTrunc(TI)) + return true; + if (eliminateIdentitySCEV(UseInst, IVOperand)) return true; @@ -548,8 +667,8 @@ bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) { auto *Invariant = Rewriter.expandCodeFor(S, I->getType(), IP); I->replaceAllUsesWith(Invariant); - DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I - << " with loop invariant: " << *S << '\n'); + LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I + << " with loop invariant: " << *S << '\n'); ++NumFoldedUser; Changed = true; DeadInsts.emplace_back(I); @@ -589,7 +708,7 @@ bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst, if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand)) return false; - DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); + LLVM_DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); UseInst->replaceAllUsesWith(IVOperand); ++NumElimIdentity; @@ -771,6 +890,15 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { SimpleIVUsers.pop_back_val(); Instruction *UseInst = UseOper.first; + // If a user of the IndVar is trivially dead, we prefer just to mark it dead + // rather than try to do some complex analysis or transformation (such as + // widening) basing on it. + // TODO: Propagate TLI and pass it here to handle more cases. + if (isInstructionTriviallyDead(UseInst, /* TLI */ nullptr)) { + DeadInsts.emplace_back(UseInst); + continue; + } + // Bypass back edges to avoid extra work. if (UseInst == CurrIV) continue; @@ -783,7 +911,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { for (unsigned N = 0; IVOperand; ++N) { assert(N <= Simplified.size() && "runaway iteration"); - Value *NewOper = foldIVUser(UseOper.first, IVOperand); + Value *NewOper = foldIVUser(UseInst, IVOperand); if (!NewOper) break; // done folding IVOperand = dyn_cast<Instruction>(NewOper); @@ -791,12 +919,12 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { if (!IVOperand) continue; - if (eliminateIVUser(UseOper.first, IVOperand)) { + if (eliminateIVUser(UseInst, IVOperand)) { pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); continue; } - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseOper.first)) { + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseInst)) { if ((isa<OverflowingBinaryOperator>(BO) && strengthenOverflowingOperation(BO, IVOperand)) || (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand))) { @@ -806,13 +934,13 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { } } - CastInst *Cast = dyn_cast<CastInst>(UseOper.first); + CastInst *Cast = dyn_cast<CastInst>(UseInst); if (V && Cast) { V->visitCast(Cast); continue; } - if (isSimpleIVUser(UseOper.first, L, SE)) { - pushIVUsers(UseOper.first, L, Simplified, SimpleIVUsers); + if (isSimpleIVUser(UseInst, L, SE)) { + pushIVUsers(UseInst, L, Simplified, SimpleIVUsers); } } } diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp deleted file mode 100644 index f3d4f2ef38d7..000000000000 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp +++ /dev/null @@ -1,152 +0,0 @@ -//===------ SimplifyInstructions.cpp - Remove redundant instructions ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is a utility pass used for testing the InstructionSimplify analysis. -// The analysis is applied to every instruction, and if it simplifies then the -// instruction is replaced by the simplification. If you are looking for a pass -// that performs serious instruction folding, use the instcombine pass instead. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/SimplifyInstructions.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Type.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" -using namespace llvm; - -#define DEBUG_TYPE "instsimplify" - -STATISTIC(NumSimplified, "Number of redundant instructions removed"); - -static bool runImpl(Function &F, const SimplifyQuery &SQ, - OptimizationRemarkEmitter *ORE) { - SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; - bool Changed = false; - - do { - for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { - // Here be subtlety: the iterator must be incremented before the loop - // body (not sure why), so a range-for loop won't work here. - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - Instruction *I = &*BI++; - // The first time through the loop ToSimplify is empty and we try to - // simplify all instructions. On later iterations ToSimplify is not - // empty and we only bother simplifying instructions that are in it. - if (!ToSimplify->empty() && !ToSimplify->count(I)) - continue; - - // Don't waste time simplifying unused instructions. - if (!I->use_empty()) { - if (Value *V = SimplifyInstruction(I, SQ, ORE)) { - // Mark all uses for resimplification next time round the loop. - for (User *U : I->users()) - Next->insert(cast<Instruction>(U)); - I->replaceAllUsesWith(V); - ++NumSimplified; - Changed = true; - } - } - if (RecursivelyDeleteTriviallyDeadInstructions(I, SQ.TLI)) { - // RecursivelyDeleteTriviallyDeadInstruction can remove more than one - // instruction, so simply incrementing the iterator does not work. - // When instructions get deleted re-iterate instead. - BI = BB->begin(); - BE = BB->end(); - Changed = true; - } - } - } - - // Place the list of instructions to simplify on the next loop iteration - // into ToSimplify. - std::swap(ToSimplify, Next); - Next->clear(); - } while (!ToSimplify->empty()); - - return Changed; -} - -namespace { - struct InstSimplifier : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - InstSimplifier() : FunctionPass(ID) { - initializeInstSimplifierPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); - } - - /// runOnFunction - Remove instructions that simplify. - bool runOnFunction(Function &F) override { - if (skipFunction(F)) - return false; - - const DominatorTree *DT = - &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - const TargetLibraryInfo *TLI = - &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); - AssumptionCache *AC = - &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - OptimizationRemarkEmitter *ORE = - &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); - const DataLayout &DL = F.getParent()->getDataLayout(); - const SimplifyQuery SQ(DL, TLI, DT, AC); - return runImpl(F, SQ, ORE); - } - }; -} - -char InstSimplifier::ID = 0; -INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify", - "Remove redundant instructions", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) -INITIALIZE_PASS_END(InstSimplifier, "instsimplify", - "Remove redundant instructions", false, false) -char &llvm::InstructionSimplifierID = InstSimplifier::ID; - -// Public interface to the simplify instructions pass. -FunctionPass *llvm::createInstructionSimplifierPass() { - return new InstSimplifier(); -} - -PreservedAnalyses InstSimplifierPass::run(Function &F, - FunctionAnalysisManager &AM) { - auto &DT = AM.getResult<DominatorTreeAnalysis>(F); - auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); - auto &AC = AM.getResult<AssumptionAnalysis>(F); - auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); - const DataLayout &DL = F.getParent()->getDataLayout(); - const SimplifyQuery SQ(DL, &TLI, &DT, &AC); - bool Changed = runImpl(F, SQ, &ORE); - if (!Changed) - return PreservedAnalyses::all(); - - PreservedAnalyses PA; - PA.preserveSet<CFGAnalyses>(); - return PA; -} diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 03a1d55ddc30..8c48597fc2e4 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -7,10 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This is a utility pass used for testing the InstructionSimplify analysis. -// The analysis is applied to every instruction, and if it simplifies then the -// instruction is replaced by the simplification. If you are looking for a pass -// that performs serious instruction folding, use the instcombine pass instead. +// This file implements the library calls simplifier. It does not implement +// any pass, but can't be used by other passes to do simplifications. // //===----------------------------------------------------------------------===// @@ -21,7 +19,9 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -33,7 +33,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" -#include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace PatternMatch; @@ -104,19 +103,51 @@ static bool callHasFloatingPointArgument(const CallInst *CI) { }); } -/// \brief Check whether the overloaded unary floating point function -/// corresponding to \a Ty is available. -static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, - LibFunc DoubleFn, LibFunc FloatFn, - LibFunc LongDoubleFn) { - switch (Ty->getTypeID()) { - case Type::FloatTyID: - return TLI->has(FloatFn); - case Type::DoubleTyID: - return TLI->has(DoubleFn); - default: - return TLI->has(LongDoubleFn); - } +static Value *convertStrToNumber(CallInst *CI, StringRef &Str, int64_t Base) { + if (Base < 2 || Base > 36) + // handle special zero base + if (Base != 0) + return nullptr; + + char *End; + std::string nptr = Str.str(); + errno = 0; + long long int Result = strtoll(nptr.c_str(), &End, Base); + if (errno) + return nullptr; + + // if we assume all possible target locales are ASCII supersets, + // then if strtoll successfully parses a number on the host, + // it will also successfully parse the same way on the target + if (*End != '\0') + return nullptr; + + if (!isIntN(CI->getType()->getPrimitiveSizeInBits(), Result)) + return nullptr; + + return ConstantInt::get(CI->getType(), Result); +} + +static bool isLocallyOpenedFile(Value *File, CallInst *CI, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { + CallInst *FOpen = dyn_cast<CallInst>(File); + if (!FOpen) + return false; + + Function *InnerCallee = FOpen->getCalledFunction(); + if (!InnerCallee) + return false; + + LibFunc Func; + if (!TLI->getLibFunc(*InnerCallee, Func) || !TLI->has(Func) || + Func != LibFunc_fopen) + return false; + + inferLibFuncAttributes(*CI->getCalledFunction(), *TLI); + if (PointerMayBeCaptured(File, true, true)) + return false; + + return true; } //===----------------------------------------------------------------------===// @@ -156,9 +187,8 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy(CpyDst, Src, - ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1), - 1); + B.CreateMemCpy(CpyDst, 1, Src, 1, + ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1)); return Dst; } @@ -346,8 +376,8 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) { // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy(Dst, Src, - ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), 1); + B.CreateMemCpy(Dst, 1, Src, 1, + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len)); return Dst; } @@ -371,7 +401,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy(Dst, Src, LenV, 1); + B.CreateMemCpy(Dst, 1, Src, 1, LenV); return DstEnd; } @@ -388,7 +418,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { --SrcLen; if (SrcLen == 0) { - // strncpy(x, "", y) -> memset(x, '\0', y, 1) + // strncpy(x, "", y) -> memset(align 1 x, '\0', y) B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1); return Dst; } @@ -407,8 +437,8 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { return nullptr; Type *PT = Callee->getFunctionType()->getParamType(0); - // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] - B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1); + // strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant] + B.CreateMemCpy(Dst, 1, Src, 1, ConstantInt::get(DL.getIntPtrType(PT), Len)); return Dst; } @@ -508,7 +538,7 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) { - Module &M = *CI->getParent()->getParent()->getParent(); + Module &M = *CI->getModule(); unsigned WCharSize = TLI->getWCharSize(M) * 8; // We cannot perform this optimization without wchar_size metadata. if (WCharSize == 0) @@ -816,40 +846,19 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { - // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) - B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); + // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n) + B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, + CI->getArgOperand(2)); return CI->getArgOperand(0); } Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) { - // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) - B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); + // memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n) + B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, + CI->getArgOperand(2)); return CI->getArgOperand(0); } -// TODO: Does this belong in BuildLibCalls or should all of those similar -// functions be moved here? -static Value *emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs, - IRBuilder<> &B, const TargetLibraryInfo &TLI) { - LibFunc Func; - if (!TLI.getLibFunc("calloc", Func) || !TLI.has(Func)) - return nullptr; - - Module *M = B.GetInsertBlock()->getModule(); - const DataLayout &DL = M->getDataLayout(); - IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext())); - Value *Calloc = M->getOrInsertFunction("calloc", Attrs, B.getInt8PtrTy(), - PtrType, PtrType); - CallInst *CI = B.CreateCall(Calloc, { Num, Size }, "calloc"); - - if (const auto *F = dyn_cast<Function>(Calloc->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; -} - /// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n). static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B, const TargetLibraryInfo &TLI) { @@ -901,12 +910,19 @@ Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) { if (auto *Calloc = foldMallocMemset(CI, B, *TLI)) return Calloc; - // memset(p, v, n) -> llvm.memset(p, v, n, 1) + // memset(p, v, n) -> llvm.memset(align 1 p, v, n) Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); return CI->getArgOperand(0); } +Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilder<> &B) { + if (isa<ConstantPointerNull>(CI->getArgOperand(0))) + return emitMalloc(CI->getArgOperand(1), B, DL, TLI); + + return nullptr; +} + //===----------------------------------------------------------------------===// // Math Library Optimizations //===----------------------------------------------------------------------===// @@ -1666,12 +1682,12 @@ Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) { - // abs(x) -> x >s -1 ? x : -x - Value *Op = CI->getArgOperand(0); - Value *Pos = - B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()), "ispos"); - Value *Neg = B.CreateNeg(Op, "neg"); - return B.CreateSelect(Pos, Op, Neg); + // abs(x) -> x <s 0 ? -x : x + // The negation has 'nsw' because abs of INT_MIN is undefined. + Value *X = CI->getArgOperand(0); + Value *IsNeg = B.CreateICmpSLT(X, Constant::getNullValue(X->getType())); + Value *NegX = B.CreateNSWNeg(X, "neg"); + return B.CreateSelect(IsNeg, NegX, X); } Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) { @@ -1695,6 +1711,29 @@ Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilder<> &B) { ConstantInt::get(CI->getType(), 0x7F)); } +Value *LibCallSimplifier::optimizeAtoi(CallInst *CI, IRBuilder<> &B) { + StringRef Str; + if (!getConstantStringInfo(CI->getArgOperand(0), Str)) + return nullptr; + + return convertStrToNumber(CI, Str, 10); +} + +Value *LibCallSimplifier::optimizeStrtol(CallInst *CI, IRBuilder<> &B) { + StringRef Str; + if (!getConstantStringInfo(CI->getArgOperand(0), Str)) + return nullptr; + + if (!isa<ConstantPointerNull>(CI->getArgOperand(1))) + return nullptr; + + if (ConstantInt *CInt = dyn_cast<ConstantInt>(CI->getArgOperand(2))) { + return convertStrToNumber(CI, Str, CInt->getSExtValue()); + } + + return nullptr; +} + //===----------------------------------------------------------------------===// // Formatting and IO Library Call Optimizations //===----------------------------------------------------------------------===// @@ -1826,15 +1865,13 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) { if (CI->getNumArgOperands() == 2) { // Make sure there's no % in the constant array. We could try to handle // %% -> % in the future if we cared. - for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) - if (FormatStr[i] == '%') - return nullptr; // we found a format specifier, bail out. + if (FormatStr.find('%') != StringRef::npos) + return nullptr; // we found a format specifier, bail out. - // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) - B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + // sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1) + B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, ConstantInt::get(DL.getIntPtrType(CI->getContext()), - FormatStr.size() + 1), - 1); // Copy the null byte. + FormatStr.size() + 1)); // Copy the null byte. return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1868,7 +1905,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) { return nullptr; Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); - B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1); + B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(2), 1, IncLen); // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); @@ -1897,6 +1934,93 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) { return nullptr; } +Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) { + // Check for a fixed format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(2), FormatStr)) + return nullptr; + + // Check for size + ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + if (!Size) + return nullptr; + + uint64_t N = Size->getZExtValue(); + + // If we just have a format string (nothing else crazy) transform it. + if (CI->getNumArgOperands() == 3) { + // Make sure there's no % in the constant array. We could try to handle + // %% -> % in the future if we cared. + if (FormatStr.find('%') != StringRef::npos) + return nullptr; // we found a format specifier, bail out. + + if (N == 0) + return ConstantInt::get(CI->getType(), FormatStr.size()); + else if (N < FormatStr.size() + 1) + return nullptr; + + // sprintf(str, size, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, + // strlen(fmt)+1) + B.CreateMemCpy( + CI->getArgOperand(0), 1, CI->getArgOperand(2), 1, + ConstantInt::get(DL.getIntPtrType(CI->getContext()), + FormatStr.size() + 1)); // Copy the null byte. + return ConstantInt::get(CI->getType(), FormatStr.size()); + } + + // The remaining optimizations require the format string to be "%s" or "%c" + // and have an extra operand. + if (FormatStr.size() == 2 && FormatStr[0] == '%' && + CI->getNumArgOperands() == 4) { + + // Decode the second character of the format string. + if (FormatStr[1] == 'c') { + if (N == 0) + return ConstantInt::get(CI->getType(), 1); + else if (N == 1) + return nullptr; + + // snprintf(dst, size, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 + if (!CI->getArgOperand(3)->getType()->isIntegerTy()) + return nullptr; + Value *V = B.CreateTrunc(CI->getArgOperand(3), B.getInt8Ty(), "char"); + Value *Ptr = castToCStr(CI->getArgOperand(0), B); + B.CreateStore(V, Ptr); + Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul"); + B.CreateStore(B.getInt8(0), Ptr); + + return ConstantInt::get(CI->getType(), 1); + } + + if (FormatStr[1] == 's') { + // snprintf(dest, size, "%s", str) to llvm.memcpy(dest, str, len+1, 1) + StringRef Str; + if (!getConstantStringInfo(CI->getArgOperand(3), Str)) + return nullptr; + + if (N == 0) + return ConstantInt::get(CI->getType(), Str.size()); + else if (N < Str.size() + 1) + return nullptr; + + B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(3), 1, + ConstantInt::get(CI->getType(), Str.size() + 1)); + + // The snprintf result is the unincremented number of bytes in the string. + return ConstantInt::get(CI->getType(), Str.size()); + } + } + return nullptr; +} + +Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilder<> &B) { + if (Value *V = optimizeSnPrintFString(CI, B)) { + return V; + } + + return nullptr; +} + Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) { optimizeErrorReporting(CI, B, 0); @@ -1913,9 +2037,9 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) { // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) if (CI->getNumArgOperands() == 2) { - for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) - if (FormatStr[i] == '%') // Could handle %% -> % if we cared. - return nullptr; // We found a format specifier. + // Could handle %% -> % if we cared. + if (FormatStr.find('%') != StringRef::npos) + return nullptr; // We found a format specifier. return emitFWrite( CI->getArgOperand(1), @@ -1973,22 +2097,27 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) { // Get the element size and count. ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); - if (!SizeC || !CountC) - return nullptr; - uint64_t Bytes = SizeC->getZExtValue() * CountC->getZExtValue(); - - // If this is writing zero records, remove the call (it's a noop). - if (Bytes == 0) - return ConstantInt::get(CI->getType(), 0); - - // If this is writing one byte, turn it into fputc. - // This optimisation is only valid, if the return value is unused. - if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) - Value *Char = B.CreateLoad(castToCStr(CI->getArgOperand(0), B), "char"); - Value *NewCI = emitFPutC(Char, CI->getArgOperand(3), B, TLI); - return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr; + if (SizeC && CountC) { + uint64_t Bytes = SizeC->getZExtValue() * CountC->getZExtValue(); + + // If this is writing zero records, remove the call (it's a noop). + if (Bytes == 0) + return ConstantInt::get(CI->getType(), 0); + + // If this is writing one byte, turn it into fputc. + // This optimisation is only valid, if the return value is unused. + if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) + Value *Char = B.CreateLoad(castToCStr(CI->getArgOperand(0), B), "char"); + Value *NewCI = emitFPutC(Char, CI->getArgOperand(3), B, TLI); + return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr; + } } + if (isLocallyOpenedFile(CI->getArgOperand(3), CI, B, TLI)) + return emitFWriteUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), CI->getArgOperand(3), B, DL, + TLI); + return nullptr; } @@ -1997,12 +2126,18 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) { // Don't rewrite fputs to fwrite when optimising for size because fwrite // requires more arguments and thus extra MOVs are required. - if (CI->getParent()->getParent()->optForSize()) + if (CI->getFunction()->optForSize()) return nullptr; - // We can't optimize if return value is used. - if (!CI->use_empty()) - return nullptr; + // Check if has any use + if (!CI->use_empty()) { + if (isLocallyOpenedFile(CI->getArgOperand(1), CI, B, TLI)) + return emitFPutSUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), B, + TLI); + else + // We can't optimize if return value is used. + return nullptr; + } // fputs(s,F) --> fwrite(s,1,strlen(s),F) uint64_t Len = GetStringLength(CI->getArgOperand(0)); @@ -2016,6 +2151,40 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) { CI->getArgOperand(1), B, DL, TLI); } +Value *LibCallSimplifier::optimizeFPutc(CallInst *CI, IRBuilder<> &B) { + optimizeErrorReporting(CI, B, 1); + + if (isLocallyOpenedFile(CI->getArgOperand(1), CI, B, TLI)) + return emitFPutCUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), B, + TLI); + + return nullptr; +} + +Value *LibCallSimplifier::optimizeFGetc(CallInst *CI, IRBuilder<> &B) { + if (isLocallyOpenedFile(CI->getArgOperand(0), CI, B, TLI)) + return emitFGetCUnlocked(CI->getArgOperand(0), B, TLI); + + return nullptr; +} + +Value *LibCallSimplifier::optimizeFGets(CallInst *CI, IRBuilder<> &B) { + if (isLocallyOpenedFile(CI->getArgOperand(2), CI, B, TLI)) + return emitFGetSUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI); + + return nullptr; +} + +Value *LibCallSimplifier::optimizeFRead(CallInst *CI, IRBuilder<> &B) { + if (isLocallyOpenedFile(CI->getArgOperand(3), CI, B, TLI)) + return emitFReadUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), CI->getArgOperand(3), B, DL, + TLI); + + return nullptr; +} + Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) { // Check for a constant string. StringRef Str; @@ -2099,6 +2268,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeMemMove(CI, Builder); case LibFunc_memset: return optimizeMemSet(CI, Builder); + case LibFunc_realloc: + return optimizeRealloc(CI, Builder); case LibFunc_wcslen: return optimizeWcslen(CI, Builder); default: @@ -2290,16 +2461,33 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { return optimizeIsAscii(CI, Builder); case LibFunc_toascii: return optimizeToAscii(CI, Builder); + case LibFunc_atoi: + case LibFunc_atol: + case LibFunc_atoll: + return optimizeAtoi(CI, Builder); + case LibFunc_strtol: + case LibFunc_strtoll: + return optimizeStrtol(CI, Builder); case LibFunc_printf: return optimizePrintF(CI, Builder); case LibFunc_sprintf: return optimizeSPrintF(CI, Builder); + case LibFunc_snprintf: + return optimizeSnPrintF(CI, Builder); case LibFunc_fprintf: return optimizeFPrintF(CI, Builder); case LibFunc_fwrite: return optimizeFWrite(CI, Builder); + case LibFunc_fread: + return optimizeFRead(CI, Builder); case LibFunc_fputs: return optimizeFPuts(CI, Builder); + case LibFunc_fgets: + return optimizeFGets(CI, Builder); + case LibFunc_fputc: + return optimizeFPutc(CI, Builder); + case LibFunc_fgetc: + return optimizeFGetc(CI, Builder); case LibFunc_puts: return optimizePuts(CI, Builder); case LibFunc_perror: @@ -2307,8 +2495,6 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { case LibFunc_vfprintf: case LibFunc_fiprintf: return optimizeErrorReporting(CI, Builder, 0); - case LibFunc_fputc: - return optimizeErrorReporting(CI, Builder, 1); default: return nullptr; } @@ -2393,8 +2579,8 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) { if (isFortifiedCallFoldable(CI, 3, 2, false)) { - B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); + B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, + CI->getArgOperand(2)); return CI->getArgOperand(0); } return nullptr; @@ -2403,8 +2589,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) { if (isFortifiedCallFoldable(CI, 3, 2, false)) { - B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); + B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, + CI->getArgOperand(2)); return CI->getArgOperand(0); } return nullptr; diff --git a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp index 968eb0208f43..f8d758c54983 100644 --- a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp @@ -101,7 +101,8 @@ static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap, // At this point module should have the proper mix of globals and locals. // As we attempt to partition this module, we must not change any // locals to globals. - DEBUG(dbgs() << "Partition module with (" << M->size() << ")functions\n"); + LLVM_DEBUG(dbgs() << "Partition module with (" << M->size() + << ")functions\n"); ClusterMapType GVtoClusterMap; ComdatMembersType ComdatMembers; @@ -180,28 +181,31 @@ static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap, std::make_pair(std::distance(GVtoClusterMap.member_begin(I), GVtoClusterMap.member_end()), I)); - std::sort(Sets.begin(), Sets.end(), [](const SortType &a, const SortType &b) { - if (a.first == b.first) - return a.second->getData()->getName() > b.second->getData()->getName(); - else - return a.first > b.first; - }); + llvm::sort(Sets.begin(), Sets.end(), + [](const SortType &a, const SortType &b) { + if (a.first == b.first) + return a.second->getData()->getName() > + b.second->getData()->getName(); + else + return a.first > b.first; + }); for (auto &I : Sets) { unsigned CurrentClusterID = BalancinQueue.top().first; unsigned CurrentClusterSize = BalancinQueue.top().second; BalancinQueue.pop(); - DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size(" << I.first - << ") ----> " << I.second->getData()->getName() << "\n"); + LLVM_DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size(" + << I.first << ") ----> " << I.second->getData()->getName() + << "\n"); for (ClusterMapType::member_iterator MI = GVtoClusterMap.findLeader(I.second); MI != GVtoClusterMap.member_end(); ++MI) { if (!Visited.insert(*MI).second) continue; - DEBUG(dbgs() << "----> " << (*MI)->getName() - << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n"); + LLVM_DEBUG(dbgs() << "----> " << (*MI)->getName() + << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n"); Visited.insert(*MI); ClusterIDMap[*MI] = CurrentClusterID; CurrentClusterSize++; @@ -270,7 +274,7 @@ void llvm::SplitModule( for (unsigned I = 0; I < N; ++I) { ValueToValueMapTy VMap; std::unique_ptr<Module> MPart( - CloneModule(M.get(), VMap, [&](const GlobalValue *GV) { + CloneModule(*M, VMap, [&](const GlobalValue *GV) { if (ClusterIDMap.count(GV)) return (ClusterIDMap[GV] == I); else diff --git a/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp index 49dc15cf5e7c..ac0b519f4a77 100644 --- a/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp +++ b/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp @@ -21,7 +21,6 @@ #include "llvm/IR/Type.h" #include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" using namespace llvm; @@ -75,6 +74,3 @@ bool StripGCRelocates::runOnFunction(Function &F) { INITIALIZE_PASS(StripGCRelocates, "strip-gc-relocates", "Strip gc.relocates inserted through RewriteStatepointsForGC", true, false) -FunctionPass *llvm::createStripGCRelocatesPass() { - return new StripGCRelocates(); -} diff --git a/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp index cd0378e0140c..8956a089a99c 100644 --- a/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp +++ b/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp @@ -9,7 +9,7 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/Pass.h" -#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils.h" using namespace llvm; namespace { diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index ed444e4cf43c..e633ac0c874d 100644 --- a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -19,7 +19,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" using namespace llvm; char UnifyFunctionExitNodes::ID = 0; diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp index f6c7d1c4989e..afd842f59911 100644 --- a/contrib/llvm/lib/Transforms/Utils/Utils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp @@ -12,7 +12,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils.h" #include "llvm-c/Initialization.h" +#include "llvm-c/Transforms/Utils.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/InitializePasses.h" #include "llvm/PassRegistry.h" @@ -33,7 +36,6 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) { initializePromoteLegacyPassPass(Registry); initializeStripNonLineTableDebugInfoPass(Registry); initializeUnifyFunctionExitNodesPass(Registry); - initializeInstSimplifierPass(Registry); initializeMetaRenamerPass(Registry); initializeStripGCRelocatesPass(Registry); initializePredicateInfoPrinterLegacyPassPass(Registry); @@ -43,3 +45,12 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) { void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) { initializeTransformUtils(*unwrap(R)); } + +void LLVMAddLowerSwitchPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLowerSwitchPass()); +} + +void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createPromoteMemoryToRegisterPass()); +} + diff --git a/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp b/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp index c3feea6a0a41..948d9bd5baad 100644 --- a/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp +++ b/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp @@ -20,8 +20,14 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy()) return false; + uint64_t StoreSize = DL.getTypeSizeInBits(StoredVal->getType()); + + // The store size must be byte-aligned to support future type casts. + if (llvm::alignTo(StoreSize, 8) != StoreSize) + return false; + // The store has to be at least as big as the load. - if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy)) + if (StoreSize < DL.getTypeSizeInBits(LoadTy)) return false; // Don't coerce non-integral pointers to integers or vice versa. @@ -389,8 +395,8 @@ Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy, NewLoad->takeName(SrcVal); NewLoad->setAlignment(SrcVal->getAlignment()); - DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); - DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); + LLVM_DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); + LLVM_DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); // Replace uses of the original load with the wider load. On a big endian // system, we need to shift down to get the relevant bits. |