diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2016-08-16 21:02:59 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2016-08-16 21:02:59 +0000 |
commit | 3ca95b020283db6244cab92ede73c969253b6a31 (patch) | |
tree | d16e791e58694facd8f68d3e2797a1eaa8018afc /contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | |
parent | 27067774dce3388702a4cf744d7096c6fb71b688 (diff) | |
parent | c3aee98e721333f265a88d6bf348e6e468f027d4 (diff) |
Update llvm to release_39 branch r276489, and resolve conflicts.
Notes
Notes:
svn path=/projects/clang390-import/; revision=304240
Diffstat (limited to 'contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp')
-rw-r--r-- | contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 430 |
1 files changed, 296 insertions, 134 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index faada9c2a7db..cf5b76dc365b 100644 --- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -16,23 +16,27 @@ #include "llvm-c/Transforms/PassManagerBuilder.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/CFLAliasAnalysis.h" +#include "llvm/Analysis/CFLAndersAliasAnalysis.h" +#include "llvm/Analysis/CFLSteensAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/FunctionInfo.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" +#include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/InferFunctionAttrs.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Vectorize.h" using namespace llvm; @@ -58,10 +62,6 @@ static cl::opt<bool> ExtraVectorizerPasses( "extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization.")); -static cl::opt<bool> UseNewSROA("use-new-sroa", - cl::init(true), cl::Hidden, - cl::desc("Enable the new, experimental SROA pass")); - static cl::opt<bool> RunLoopRerolling("reroll-loops", cl::Hidden, cl::desc("Run the loop rerolling pass")); @@ -80,9 +80,19 @@ RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization", cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop " "vectorizer instead of before")); -static cl::opt<bool> UseCFLAA("use-cfl-aa", - cl::init(false), cl::Hidden, - cl::desc("Enable the new, experimental CFL alias analysis")); +// Experimental option to use CFL-AA +enum class CFLAAType { None, Steensgaard, Andersen, Both }; +static cl::opt<CFLAAType> + UseCFLAA("use-cfl-aa", cl::init(CFLAAType::None), cl::Hidden, + cl::desc("Enable the new, experimental CFL alias analysis"), + cl::values(clEnumValN(CFLAAType::None, "none", "Disable CFL-AA"), + clEnumValN(CFLAAType::Steensgaard, "steens", + "Enable unification-based CFL-AA"), + clEnumValN(CFLAAType::Andersen, "anders", + "Enable inclusion-based CFL-AA"), + clEnumValN(CFLAAType::Both, "both", + "Enable both variants of CFL-aa"), + clEnumValEnd)); static cl::opt<bool> EnableMLSM("mlsm", cl::init(true), cl::Hidden, @@ -92,25 +102,44 @@ static cl::opt<bool> EnableLoopInterchange( "enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the new, experimental LoopInterchange Pass")); -static cl::opt<bool> EnableLoopDistribute( - "enable-loop-distribute", cl::init(false), cl::Hidden, - cl::desc("Enable the new, experimental LoopDistribution Pass")); - static cl::opt<bool> EnableNonLTOGlobalsModRef( "enable-non-lto-gmr", cl::init(true), cl::Hidden, cl::desc( "Enable the GlobalsModRef AliasAnalysis outside of the LTO pipeline.")); static cl::opt<bool> EnableLoopLoadElim( - "enable-loop-load-elim", cl::init(false), cl::Hidden, - cl::desc("Enable the new, experimental LoopLoadElimination Pass")); + "enable-loop-load-elim", cl::init(true), cl::Hidden, + cl::desc("Enable the LoopLoadElimination Pass")); + +static cl::opt<std::string> RunPGOInstrGen( + "profile-generate", cl::init(""), cl::Hidden, + cl::desc("Enable generation phase of PGO instrumentation and specify the " + "path of profile data file")); + +static cl::opt<std::string> RunPGOInstrUse( + "profile-use", cl::init(""), cl::Hidden, cl::value_desc("filename"), + cl::desc("Enable use phase of PGO instrumentation and specify the path " + "of profile data file")); + +static cl::opt<bool> UseLoopVersioningLICM( + "enable-loop-versioning-licm", cl::init(false), cl::Hidden, + cl::desc("Enable the experimental Loop Versioning LICM pass")); + +static cl::opt<bool> + DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, + cl::desc("Disable pre-instrumentation inliner")); + +static cl::opt<int> PreInlineThreshold( + "preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore, + cl::desc("Control the amount of inlining in pre-instrumentation inliner " + "(default = 75)")); PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; LibraryInfo = nullptr; Inliner = nullptr; - FunctionIndex = nullptr; + ModuleSummary = nullptr; DisableUnitAtATime = false; DisableUnrollLoops = false; BBVectorize = RunBBVectorization; @@ -123,6 +152,10 @@ PassManagerBuilder::PassManagerBuilder() { VerifyOutput = false; MergeFunctions = false; PrepareForLTO = false; + PGOInstrGen = RunPGOInstrGen; + PGOInstrUse = RunPGOInstrUse; + PrepareForThinLTO = false; + PerformThinLTO = false; } PassManagerBuilder::~PassManagerBuilder() { @@ -137,11 +170,11 @@ static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy, void PassManagerBuilder::addGlobalExtension( PassManagerBuilder::ExtensionPointTy Ty, PassManagerBuilder::ExtensionFn Fn) { - GlobalExtensions->push_back(std::make_pair(Ty, Fn)); + GlobalExtensions->push_back(std::make_pair(Ty, std::move(Fn))); } void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) { - Extensions.push_back(std::make_pair(Ty, Fn)); + Extensions.push_back(std::make_pair(Ty, std::move(Fn))); } void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, @@ -156,15 +189,34 @@ void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, void PassManagerBuilder::addInitialAliasAnalysisPasses( legacy::PassManagerBase &PM) const { + switch (UseCFLAA) { + case CFLAAType::Steensgaard: + PM.add(createCFLSteensAAWrapperPass()); + break; + case CFLAAType::Andersen: + PM.add(createCFLAndersAAWrapperPass()); + break; + case CFLAAType::Both: + PM.add(createCFLSteensAAWrapperPass()); + PM.add(createCFLAndersAAWrapperPass()); + break; + default: + break; + } + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that // BasicAliasAnalysis wins if they disagree. This is intended to help // support "obvious" type-punning idioms. - if (UseCFLAA) - PM.add(createCFLAAWrapperPass()); PM.add(createTypeBasedAAWrapperPass()); PM.add(createScopedNoAliasAAWrapperPass()); } +void PassManagerBuilder::addInstructionCombiningPass( + legacy::PassManagerBase &PM) const { + bool ExpensiveCombines = OptLevel > 2; + PM.add(createInstructionCombiningPass(ExpensiveCombines)); +} + void PassManagerBuilder::populateFunctionPassManager( legacy::FunctionPassManager &FPM) { addExtensionsToPM(EP_EarlyAsPossible, FPM); @@ -178,94 +230,50 @@ void PassManagerBuilder::populateFunctionPassManager( addInitialAliasAnalysisPasses(FPM); FPM.add(createCFGSimplificationPass()); - if (UseNewSROA) - FPM.add(createSROAPass()); - else - FPM.add(createScalarReplAggregatesPass()); + FPM.add(createSROAPass()); FPM.add(createEarlyCSEPass()); + FPM.add(createGVNHoistPass()); FPM.add(createLowerExpectIntrinsicPass()); } -void PassManagerBuilder::populateModulePassManager( - legacy::PassManagerBase &MPM) { - // Allow forcing function attributes as a debugging and tuning aid. - MPM.add(createForceFunctionAttrsLegacyPass()); - - // If all optimizations are disabled, just run the always-inline pass and, - // if enabled, the function merging pass. - if (OptLevel == 0) { - if (Inliner) { - MPM.add(Inliner); - Inliner = nullptr; - } - - // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly - // creates a CGSCC pass manager, but we don't want to add extensions into - // that pass manager. To prevent this we insert a no-op module pass to reset - // the pass manager to get the same behavior as EP_OptimizerLast in non-O0 - // builds. The function merging pass is - if (MergeFunctions) - MPM.add(createMergeFunctionsPass()); - else if (!GlobalExtensions->empty() || !Extensions.empty()) - MPM.add(createBarrierNoopPass()); - - addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); +// Do PGO instrumentation generation or use pass as the option specified. +void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) { + if (PGOInstrGen.empty() && PGOInstrUse.empty()) return; - } - - // Add LibraryInfo if we have some. - if (LibraryInfo) - MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); - - addInitialAliasAnalysisPasses(MPM); - - if (!DisableUnitAtATime) { - // Infer attributes about declarations if possible. - MPM.add(createInferFunctionAttrsLegacyPass()); - - addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); - - MPM.add(createIPSCCPPass()); // IP SCCP - MPM.add(createGlobalOptimizerPass()); // Optimize out global vars - // Promote any localized global vars - MPM.add(createPromoteMemoryToRegisterPass()); - - MPM.add(createDeadArgEliminationPass()); // Dead argument elimination - - MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE + // Perform the preinline and cleanup passes for O1 and above. + // And avoid doing them if optimizing for size. + if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner) { + // Create preinline pass. + MPM.add(createFunctionInliningPass(PreInlineThreshold)); + MPM.add(createSROAPass()); + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createInstructionCombiningPass()); // Combine silly seq's addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE } - - if (EnableNonLTOGlobalsModRef) - // We add a module alias analysis pass here. In part due to bugs in the - // analysis infrastructure this "works" in that the analysis stays alive - // for the entire SCC pass run below. - MPM.add(createGlobalsAAWrapperPass()); - - // Start of CallGraph SCC passes. - if (!DisableUnitAtATime) - MPM.add(createPruneEHPass()); // Remove dead EH info - if (Inliner) { - MPM.add(Inliner); - Inliner = nullptr; + if (!PGOInstrGen.empty()) { + MPM.add(createPGOInstrumentationGenLegacyPass()); + // Add the profile lowering pass. + InstrProfOptions Options; + Options.InstrProfileOutput = PGOInstrGen; + MPM.add(createInstrProfilingLegacyPass(Options)); } - if (!DisableUnitAtATime) - MPM.add(createPostOrderFunctionAttrsPass()); - if (OptLevel > 2) - MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args - + if (!PGOInstrUse.empty()) + MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse)); +} +void PassManagerBuilder::addFunctionSimplificationPasses( + legacy::PassManagerBase &MPM) { // Start of function pass. // Break up aggregate allocas, using SSAUpdater. - if (UseNewSROA) - MPM.add(createSROAPass()); - else - MPM.add(createScalarReplAggregatesPass(-1, false)); + MPM.add(createSROAPass()); MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + // Speculative execution if the target has divergent branches; otherwise nop. + MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); MPM.add(createJumpThreadingPass()); // Thread jumps. MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals MPM.add(createCFGSimplificationPass()); // Merge & remove BBs - MPM.add(createInstructionCombiningPass()); // Combine silly seq's + // Combine silly seq's + addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); MPM.add(createTailCallEliminationPass()); // Eliminate tail calls @@ -276,7 +284,7 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createLICMPass()); // Hoist loop invariants MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); MPM.add(createCFGSimplificationPass()); - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. MPM.add(createLoopDeletionPass()); // Delete dead loops @@ -303,7 +311,7 @@ void PassManagerBuilder::populateModulePassManager( // Run instcombine after redundancy elimination to exploit opportunities // opened up by them. - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); MPM.add(createJumpThreadingPass()); // Thread jumps MPM.add(createCorrelatedValuePropagationPass()); @@ -320,7 +328,7 @@ void PassManagerBuilder::populateModulePassManager( if (BBVectorize) { MPM.add(createBBVectorizePass()); - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); if (OptLevel > 1 && UseGVNAfterVectorization) MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies @@ -338,18 +346,99 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs - MPM.add(createInstructionCombiningPass()); // Clean up after everything. + // Clean up after everything. + addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); +} + +void PassManagerBuilder::populateModulePassManager( + legacy::PassManagerBase &MPM) { + // Allow forcing function attributes as a debugging and tuning aid. + MPM.add(createForceFunctionAttrsLegacyPass()); + + // If all optimizations are disabled, just run the always-inline pass and, + // if enabled, the function merging pass. + if (OptLevel == 0) { + addPGOInstrPasses(MPM); + if (Inliner) { + MPM.add(Inliner); + Inliner = nullptr; + } + + // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly + // creates a CGSCC pass manager, but we don't want to add extensions into + // that pass manager. To prevent this we insert a no-op module pass to reset + // the pass manager to get the same behavior as EP_OptimizerLast in non-O0 + // builds. The function merging pass is + if (MergeFunctions) + MPM.add(createMergeFunctionsPass()); + else if (!GlobalExtensions->empty() || !Extensions.empty()) + MPM.add(createBarrierNoopPass()); + + addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); + return; + } + + // Add LibraryInfo if we have some. + if (LibraryInfo) + MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); + + addInitialAliasAnalysisPasses(MPM); + + if (!DisableUnitAtATime) { + // Infer attributes about declarations if possible. + MPM.add(createInferFunctionAttrsLegacyPass()); + + addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); + + MPM.add(createIPSCCPPass()); // IP SCCP + MPM.add(createGlobalOptimizerPass()); // Optimize out global vars + // Promote any localized global vars. + MPM.add(createPromoteMemoryToRegisterPass()); + + MPM.add(createDeadArgEliminationPass()); // Dead argument elimination + + addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE + addExtensionsToPM(EP_Peephole, MPM); + MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + } + + if (!PerformThinLTO) { + /// PGO instrumentation is added during the compile phase for ThinLTO, do + /// not run it a second time + addPGOInstrPasses(MPM); + } + + // Indirect call promotion that promotes intra-module targets only. + MPM.add(createPGOIndirectCallPromotionLegacyPass()); + + if (EnableNonLTOGlobalsModRef) + // We add a module alias analysis pass here. In part due to bugs in the + // analysis infrastructure this "works" in that the analysis stays alive + // for the entire SCC pass run below. + MPM.add(createGlobalsAAWrapperPass()); + + // Start of CallGraph SCC passes. + if (!DisableUnitAtATime) + MPM.add(createPruneEHPass()); // Remove dead EH info + if (Inliner) { + MPM.add(Inliner); + Inliner = nullptr; + } + if (!DisableUnitAtATime) + MPM.add(createPostOrderFunctionAttrsLegacyPass()); + if (OptLevel > 2) + MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args + + addFunctionSimplificationPasses(MPM); // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC // pass manager that we are specifically trying to avoid. To prevent this // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); - if (!DisableUnitAtATime) - MPM.add(createReversePostOrderFunctionAttrsPass()); - - if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) { + if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO && + !PrepareForThinLTO) // Remove avail extern fns and globals definitions if we aren't // compiling an object file for later LTO. For LTO we want to preserve // these so they are eligible for inlining at link-time. Note if they @@ -360,6 +449,34 @@ void PassManagerBuilder::populateModulePassManager( // globals referenced by available external functions dead // and saves running remaining passes on the eliminated functions. MPM.add(createEliminateAvailableExternallyPass()); + + if (!DisableUnitAtATime) + MPM.add(createReversePostOrderFunctionAttrsPass()); + + // If we are planning to perform ThinLTO later, let's not bloat the code with + // unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes + // during ThinLTO and perform the rest of the optimizations afterward. + if (PrepareForThinLTO) { + // Reduce the size of the IR as much as possible. + MPM.add(createGlobalOptimizerPass()); + // Rename anon function to be able to export them in the summary. + MPM.add(createNameAnonFunctionPass()); + return; + } + + if (PerformThinLTO) + // Optimize globals now when performing ThinLTO, this enables more + // optimizations later. + MPM.add(createGlobalOptimizerPass()); + + // Scheduling LoopVersioningLICM when inlining is over, because after that + // we may see more accurate aliasing. Reason to run this late is that too + // early versioning may prevent further inlining due to increase of code + // size. By placing it just after inlining other optimizations which runs + // later might get benefit of no-alias assumption in clone loop. + if (UseLoopVersioningLICM) { + MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM + MPM.add(createLICMPass()); // Hoist loop invariants } if (EnableNonLTOGlobalsModRef) @@ -391,9 +508,10 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); // Distribute loops to allow partial vectorization. I.e. isolate dependences - // into separate loop that would otherwise inhibit vectorization. - if (EnableLoopDistribute) - MPM.add(createLoopDistributePass()); + // into separate loop that would otherwise inhibit vectorization. This is + // currently only performed for loops marked with the metadata + // llvm.loop.distribute=true or when -enable-loop-distribute is specified. + MPM.add(createLoopDistributePass(/*ProcessAllLoopsByDefault=*/false)); MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize)); @@ -407,7 +525,7 @@ void PassManagerBuilder::populateModulePassManager( // on -O1 and no #pragma is found). Would be good to have these two passes // as function calls, so that we can only pass them when the vectorizer // changed the code. - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); if (OptLevel > 1 && ExtraVectorizerPasses) { // At higher optimization levels, try to clean up any runtime overlap and // alignment checks inserted by the vectorizer. We want to track correllated @@ -417,11 +535,11 @@ void PassManagerBuilder::populateModulePassManager( // dead (or speculatable) control flows or more combining opportunities. MPM.add(createEarlyCSEPass()); MPM.add(createCorrelatedValuePropagationPass()); - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); MPM.add(createLICMPass()); MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); MPM.add(createCFGSimplificationPass()); - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); } if (RunSLPAfterLoopVectorization) { @@ -434,7 +552,7 @@ void PassManagerBuilder::populateModulePassManager( if (BBVectorize) { MPM.add(createBBVectorizePass()); - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); if (OptLevel > 1 && UseGVNAfterVectorization) MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies @@ -449,19 +567,22 @@ void PassManagerBuilder::populateModulePassManager( addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); if (!DisableUnrollLoops) { MPM.add(createLoopUnrollPass()); // Unroll small loops // LoopUnroll may generate some redundency to cleanup. - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); // Runtime unrolling will introduce runtime check in loop prologue. If the // unrolled loop is a inner loop, then the prologue will be inside the // outer loop. LICM pass can help to promote the runtime check out if the // checked value is loop invariant. MPM.add(createLICMPass()); + + // Get rid of LCSSA nodes. + MPM.add(createInstructionSimplifierPass()); } // After vectorization and unrolling, assume intrinsics may tell us more @@ -487,11 +608,15 @@ void PassManagerBuilder::populateModulePassManager( } void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { + // Remove unused virtual tables to improve the quality of code generated by + // whole-program devirtualization and bitset lowering. + PM.add(createGlobalDCEPass()); + // Provide AliasAnalysis services for optimizations. addInitialAliasAnalysisPasses(PM); - if (FunctionIndex) - PM.add(createFunctionImportPass(FunctionIndex)); + if (ModuleSummary) + PM.add(createFunctionImportPass(ModuleSummary)); // Allow forcing function attributes as a debugging and tuning aid. PM.add(createForceFunctionAttrsLegacyPass()); @@ -499,14 +624,32 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // Infer attributes about declarations if possible. PM.add(createInferFunctionAttrsLegacyPass()); - // Propagate constants at call sites into the functions they call. This - // opens opportunities for globalopt (and inlining) by substituting function - // pointers passed as arguments to direct uses of functions. - PM.add(createIPSCCPPass()); + if (OptLevel > 1) { + // Indirect call promotion. This should promote all the targets that are + // left by the earlier promotion pass that promotes intra-module targets. + // This two-step promotion is to save the compile time. For LTO, it should + // produce the same result as if we only do promotion here. + PM.add(createPGOIndirectCallPromotionLegacyPass(true)); + + // Propagate constants at call sites into the functions they call. This + // opens opportunities for globalopt (and inlining) by substituting function + // pointers passed as arguments to direct uses of functions. + PM.add(createIPSCCPPass()); + } - // Now that we internalized some globals, see if we can hack on them! - PM.add(createPostOrderFunctionAttrsPass()); + // Infer attributes about definitions. The readnone attribute in particular is + // required for virtual constant propagation. + PM.add(createPostOrderFunctionAttrsLegacyPass()); PM.add(createReversePostOrderFunctionAttrsPass()); + + // Apply whole-program devirtualization and virtual constant propagation. + PM.add(createWholeProgramDevirtPass()); + + // That's all we need at opt level 1. + if (OptLevel == 1) + return; + + // Now that we internalized some globals, see if we can hack on them! PM.add(createGlobalOptimizerPass()); // Promote any localized global vars. PM.add(createPromoteMemoryToRegisterPass()); @@ -522,7 +665,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // simplification opportunities, and both can propagate functions through // function pointers. When this happens, we often have to resolve varargs // calls, etc, so let instcombine do this. - PM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(PM); addExtensionsToPM(EP_Peephole, PM); // Inline small functions @@ -544,18 +687,15 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createArgumentPromotionPass()); // The IPO passes may leave cruft around. Clean up after them. - PM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(PM); addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); // Break up allocas - if (UseNewSROA) - PM.add(createSROAPass()); - else - PM.add(createScalarReplAggregatesPass()); + PM.add(createSROAPass()); // Run a few AA driven optimizations here and now, to cleanup the code. - PM.add(createPostOrderFunctionAttrsPass()); // Add nocapture. + PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture. PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. PM.add(createLICMPass()); // Hoist loop invariants. @@ -573,15 +713,20 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { if (EnableLoopInterchange) PM.add(createLoopInterchangePass()); + if (!DisableUnrollLoops) + PM.add(createSimpleLoopUnrollPass()); // Unroll small loops PM.add(createLoopVectorizePass(true, LoopVectorize)); + // The vectorizer may have significantly shortened a loop body; unroll again. + if (!DisableUnrollLoops) + PM.add(createLoopUnrollPass()); // Now that we've optimized loops (in particular loop induction variables), // we may have exposed more scalar opportunities. Run parts of the scalar // optimizer again at this point. - PM.add(createInstructionCombiningPass()); // Initial cleanup + addInstructionCombiningPass(PM); // Initial cleanup PM.add(createCFGSimplificationPass()); // if-convert PM.add(createSCCPPass()); // Propagate exposed constants - PM.add(createInstructionCombiningPass()); // Clean up again + addInstructionCombiningPass(PM); // Clean up again PM.add(createBitTrackingDCEPass()); // More scalar chains could be vectorized due to more alias information @@ -597,7 +742,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createLoadCombinePass()); // Cleanup and simplify the code after the scalar optimizations. - PM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(PM); addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); @@ -620,6 +765,23 @@ void PassManagerBuilder::addLateLTOOptimizationPasses( PM.add(createMergeFunctionsPass()); } +void PassManagerBuilder::populateThinLTOPassManager( + legacy::PassManagerBase &PM) { + PerformThinLTO = true; + + if (VerifyInput) + PM.add(createVerifierPass()); + + if (ModuleSummary) + PM.add(createFunctionImportPass(ModuleSummary)); + + populateModulePassManager(PM); + + if (VerifyOutput) + PM.add(createVerifierPass()); + PerformThinLTO = false; +} + void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { if (LibraryInfo) PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); @@ -627,17 +789,17 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { if (VerifyInput) PM.add(createVerifierPass()); - if (OptLevel > 1) + if (OptLevel != 0) addLTOOptimizationPasses(PM); // Create a function that performs CFI checks for cross-DSO calls with targets // in the current module. PM.add(createCrossDSOCFIPass()); - // Lower bit sets to globals. This pass supports Clang's control flow - // integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI - // is enabled. The pass does nothing if CFI is disabled. - PM.add(createLowerBitSetsPass()); + // Lower type metadata and the type.test intrinsic. This pass supports Clang's + // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at + // link time if CFI is enabled. The pass does nothing if CFI is disabled. + PM.add(createLowerTypeTestsPass()); if (OptLevel != 0) addLateLTOOptimizationPasses(PM); |