diff options
Diffstat (limited to 'lib/Transforms/IPO/PassManagerBuilder.cpp')
-rw-r--r-- | lib/Transforms/IPO/PassManagerBuilder.cpp | 206 |
1 files changed, 129 insertions, 77 deletions
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 9764944dc332..3ea77f08fd3c 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -1,9 +1,8 @@ //===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -31,6 +30,7 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/Attributor.h" #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/InferFunctionAttrs.h" @@ -39,9 +39,13 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/InstSimplifyPass.h" +#include "llvm/Transforms/Scalar/LICM.h" +#include "llvm/Transforms/Scalar/LoopUnrollPass.h" #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Vectorize.h" +#include "llvm/Transforms/Vectorize/LoopVectorize.h" +#include "llvm/Transforms/Vectorize/SLPVectorizer.h" using namespace llvm; @@ -50,14 +54,6 @@ static cl::opt<bool> cl::ZeroOrMore, cl::desc("Run Partial inlinining pass")); static cl::opt<bool> - RunLoopVectorization("vectorize-loops", cl::Hidden, - cl::desc("Run the Loop vectorization passes")); - -static cl::opt<bool> -RunSLPVectorization("vectorize-slp", cl::Hidden, - cl::desc("Run the SLP vectorization passes")); - -static cl::opt<bool> UseGVNAfterVectorization("use-gvn-after-vectorization", cl::init(false), cl::Hidden, cl::desc("Run GVN instead of Early CSE after vectorization passes")); @@ -73,12 +69,6 @@ RunLoopRerolling("reroll-loops", cl::Hidden, static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass")); -static cl::opt<bool> -RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization", - cl::init(true), cl::Hidden, - cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop " - "vectorizer instead of before")); - // Experimental option to use CFL-AA enum class CFLAAType { None, Steensgaard, Andersen, Both }; static cl::opt<CFLAAType> @@ -104,23 +94,13 @@ static cl::opt<bool> EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden, cl::desc("Enable preparation for ThinLTO.")); +static cl::opt<bool> + EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden, + cl::desc("Enable performing ThinLTO.")); + cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false), cl::Hidden, cl::desc("Enable hot-cold splitting pass")); - -static cl::opt<bool> RunPGOInstrGen( - "profile-generate", cl::init(false), cl::Hidden, - cl::desc("Enable PGO instrumentation.")); - -static cl::opt<std::string> - PGOOutputFile("profile-generate-file", cl::init(""), cl::Hidden, - cl::desc("Specify the path of profile data file.")); - -static cl::opt<std::string> RunPGOInstrUse( - "profile-use", cl::init(""), cl::Hidden, cl::value_desc("filename"), - cl::desc("Enable use phase of PGO instrumentation and specify the path " - "of profile data file")); - static cl::opt<bool> UseLoopVersioningLICM( "enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass")); @@ -134,10 +114,6 @@ static cl::opt<int> PreInlineThreshold( cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)")); -static cl::opt<bool> EnableEarlyCSEMemSSA( - "enable-earlycse-memssa", cl::init(true), cl::Hidden, - cl::desc("Enable the EarlyCSE w/ MemorySSA pass (default = on)")); - static cl::opt<bool> EnableGVNHoist( "enable-gvn-hoist", cl::init(false), cl::Hidden, cl::desc("Enable the GVN hoisting pass (default = off)")); @@ -156,10 +132,21 @@ static cl::opt<bool> EnableGVNSink( "enable-gvn-sink", cl::init(false), cl::Hidden, cl::desc("Enable the GVN sinking pass (default = off)")); +// This option is used in simplifying testing SampleFDO optimizations for +// profile loading. static cl::opt<bool> EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)")); +cl::opt<bool> FlattenedProfileUsed( + "flattened-profile-used", cl::init(false), cl::Hidden, + cl::desc("Indicate the sample profile being used is flattened, i.e., " + "no inline hierachy exists in the profile. ")); + +cl::opt<bool> EnableOrderFileInstrumentation( + "enable-order-file-instrumentation", cl::init(false), cl::Hidden, + cl::desc("Enable order file instrumentation (default = off)")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -167,19 +154,26 @@ PassManagerBuilder::PassManagerBuilder() { Inliner = nullptr; DisableUnrollLoops = false; SLPVectorize = RunSLPVectorization; - LoopVectorize = RunLoopVectorization; + LoopVectorize = EnableLoopVectorization; + LoopsInterleaved = EnableLoopInterleaving; RerollLoops = RunLoopRerolling; NewGVN = RunNewGVN; + LicmMssaOptCap = SetLicmMssaOptCap; + LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; DisableGVNLoadPRE = false; + ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; VerifyInput = false; VerifyOutput = false; MergeFunctions = false; PrepareForLTO = false; - EnablePGOInstrGen = RunPGOInstrGen; - PGOInstrGen = PGOOutputFile; - PGOInstrUse = RunPGOInstrUse; + EnablePGOInstrGen = false; + EnablePGOCSInstrGen = false; + EnablePGOCSInstrUse = false; + PGOInstrGen = ""; + PGOInstrUse = ""; + PGOSampleUse = ""; PrepareForThinLTO = EnablePrepareForThinLTO; - PerformThinLTO = false; + PerformThinLTO = EnablePerformThinLTO; DivergentTarget = false; } @@ -272,13 +266,19 @@ void PassManagerBuilder::populateFunctionPassManager( } // Do PGO instrumentation generation or use pass as the option specified. -void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) { - if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty()) +void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM, + bool IsCS = false) { + if (IsCS) { + if (!EnablePGOCSInstrGen && !EnablePGOCSInstrUse) + return; + } else if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty()) return; + // Perform the preinline and cleanup passes for O1 and above. // And avoid doing them if optimizing for size. + // We will not do this inline for context sensitive PGO (when IsCS is true). if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner && - PGOSampleUse.empty()) { + PGOSampleUse.empty() && !IsCS) { // Create preinline pass. We construct an InlineParams object and specify // the threshold here to avoid the command line options of the regular // inliner to influence pre-inlining. The only fields of InlineParams we @@ -296,22 +296,23 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) { MPM.add(createInstructionCombiningPass()); // Combine silly seq's addExtensionsToPM(EP_Peephole, MPM); } - if (EnablePGOInstrGen) { - MPM.add(createPGOInstrumentationGenLegacyPass()); + if ((EnablePGOInstrGen && !IsCS) || (EnablePGOCSInstrGen && IsCS)) { + MPM.add(createPGOInstrumentationGenLegacyPass(IsCS)); // Add the profile lowering pass. InstrProfOptions Options; if (!PGOInstrGen.empty()) Options.InstrProfileOutput = PGOInstrGen; Options.DoCounterPromotion = true; + Options.UseBFIInPromotion = IsCS; MPM.add(createLoopRotatePass()); - MPM.add(createInstrProfilingLegacyPass(Options)); + MPM.add(createInstrProfilingLegacyPass(Options, IsCS)); } if (!PGOInstrUse.empty()) - MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse)); + MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse, IsCS)); // Indirect call promotion that promotes intra-module targets only. // For ThinLTO this is done earlier due to interactions with globalopt // for imported functions. We don't run this at -O0. - if (OptLevel > 0) + if (OptLevel > 0 && !IsCS) MPM.add( createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty())); } @@ -320,7 +321,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // Start of function pass. // Break up aggregate allocas, using SSAUpdater. MPM.add(createSROAPass()); - MPM.add(createEarlyCSEPass(EnableEarlyCSEMemSSA)); // Catch trivial redundancies + MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies if (EnableGVNHoist) MPM.add(createGVNHoistPass()); if (EnableGVNSink) { @@ -359,7 +360,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( } // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); - MPM.add(createLICMPass()); // Hoist loop invariants + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); if (EnableSimpleLoopUnswitch) MPM.add(createSimpleLoopUnswitchLegacyPass()); else @@ -378,8 +379,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses( if (EnableLoopInterchange) MPM.add(createLoopInterchangePass()); // Interchange loops - MPM.add(createSimpleLoopUnrollPass(OptLevel, - DisableUnrollLoops)); // Unroll small loops + // Unroll small loops + MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); addExtensionsToPM(EP_LoopOptimizerEnd, MPM); // This ends the loop pass pipelines. @@ -403,14 +405,12 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createJumpThreadingPass()); // Thread jumps MPM.add(createCorrelatedValuePropagationPass()); MPM.add(createDeadStoreEliminationPass()); // Delete dead stores - MPM.add(createLICMPass()); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); addExtensionsToPM(EP_ScalarOptimizerLate, MPM); if (RerollLoops) MPM.add(createLoopRerollPass()); - if (!RunSLPAfterLoopVectorization && SLPVectorize) - MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs @@ -419,15 +419,23 @@ void PassManagerBuilder::addFunctionSimplificationPasses( addExtensionsToPM(EP_Peephole, MPM); if (EnableCHR && OptLevel >= 3 && - (!PGOInstrUse.empty() || !PGOSampleUse.empty())) + (!PGOInstrUse.empty() || !PGOSampleUse.empty() || EnablePGOCSInstrGen)) MPM.add(createControlHeightReductionLegacyPass()); } void PassManagerBuilder::populateModulePassManager( legacy::PassManagerBase &MPM) { + // Whether this is a default or *LTO pre-link pipeline. The FullLTO post-link + // is handled separately, so just check this is not the ThinLTO post-link. + bool DefaultOrPreLinkPipeline = !PerformThinLTO; + if (!PGOSampleUse.empty()) { MPM.add(createPruneEHPass()); - MPM.add(createSampleProfileLoaderPass(PGOSampleUse)); + // In ThinLTO mode, when flattened profile is used, all the available + // profile information will be annotated in PreLink phase so there is + // no need to load the profile again in PostLink. + if (!(FlattenedProfileUsed && PerformThinLTO)) + MPM.add(createSampleProfileLoaderPass(PGOSampleUse)); } // Allow forcing function attributes as a debugging and tuning aid. @@ -508,6 +516,10 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createIPSCCPPass()); // IP SCCP MPM.add(createCalledValuePropagationPass()); + + // Infer attributes on declarations, call sites, arguments, etc. + MPM.add(createAttributorLegacyPass()); + MPM.add(createGlobalOptimizerPass()); // Optimize out global vars // Promote any localized global vars. MPM.add(createPromoteMemoryToRegisterPass()); @@ -523,9 +535,14 @@ void PassManagerBuilder::populateModulePassManager( // profile annotation in backend more difficult. // PGO instrumentation is added during the compile phase for ThinLTO, do // not run it a second time - if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile) + if (DefaultOrPreLinkPipeline && !PrepareForThinLTOUsingPGOSampleProfile) addPGOInstrPasses(MPM); + // Create profile COMDAT variables. Lld linker wants to see all variables + // before the LTO/ThinLTO link since it needs to resolve symbols/comdats. + if (!PerformThinLTO && EnablePGOCSInstrGen) + MPM.add(createPGOInstrumentationGenCreateVarLegacyPass(PGOInstrGen)); + // We add a module alias analysis pass here. In part due to bugs in the // analysis infrastructure this "works" in that the analysis stays alive // for the entire SCC pass run below. @@ -567,6 +584,17 @@ void PassManagerBuilder::populateModulePassManager( // and saves running remaining passes on the eliminated functions. MPM.add(createEliminateAvailableExternallyPass()); + // CSFDO instrumentation and use pass. Don't invoke this for Prepare pass + // for LTO and ThinLTO -- The actual pass will be called after all inlines + // are performed. + // Need to do this after COMDAT variables have been eliminated, + // (i.e. after EliminateAvailableExternallyPass). + if (!(PrepareForLTO || PrepareForThinLTO)) + addPGOInstrPasses(MPM, /* IsCS */ true); + + if (EnableOrderFileInstrumentation) + MPM.add(createInstrOrderFilePass()); + MPM.add(createReversePostOrderFunctionAttrsPass()); // The inliner performs some kind of dead code elimination as it goes, @@ -605,7 +633,7 @@ void PassManagerBuilder::populateModulePassManager( // later might get benefit of no-alias assumption in clone loop. if (UseLoopVersioningLICM) { MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM - MPM.add(createLICMPass()); // Hoist loop invariants + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); } // We add a fresh GlobalsModRef run at this point. This is particularly @@ -640,7 +668,7 @@ void PassManagerBuilder::populateModulePassManager( // llvm.loop.distribute=true or when -enable-loop-distribute is specified. MPM.add(createLoopDistributePass()); - MPM.add(createLoopVectorizePass(DisableUnrollLoops, !LoopVectorize)); + MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize)); // Eliminate loads by forwarding stores from the previous iteration to loads // of the current iteration. @@ -662,7 +690,7 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createEarlyCSEPass()); MPM.add(createCorrelatedValuePropagationPass()); addInstructionCombiningPass(MPM); - MPM.add(createLICMPass()); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); MPM.add(createCFGSimplificationPass()); addInstructionCombiningPass(MPM); @@ -675,7 +703,7 @@ void PassManagerBuilder::populateModulePassManager( // before SLP vectorization. MPM.add(createCFGSimplificationPass(1, true, true, false, true)); - if (RunSLPAfterLoopVectorization && SLPVectorize) { + if (SLPVectorize) { MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. if (OptLevel > 1 && ExtraVectorizerPasses) { MPM.add(createEarlyCSEPass()); @@ -692,8 +720,9 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createLoopUnrollAndJamPass(OptLevel)); } - MPM.add(createLoopUnrollPass(OptLevel, - DisableUnrollLoops)); // Unroll small loops + // Unroll small loops + MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); if (!DisableUnrollLoops) { // LoopUnroll may generate some redundency to cleanup. @@ -703,7 +732,7 @@ void PassManagerBuilder::populateModulePassManager( // unrolled loop is a inner loop, then the prologue will be inside the // outer loop. LICM pass can help to promote the runtime check out if the // checked value is loop invariant. - MPM.add(createLICMPass()); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); } MPM.add(createWarnMissedTransformationsPass()); @@ -722,6 +751,11 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createConstantMergePass()); // Merge dup global constants } + // See comment in the new PM for justification of scheduling splitting at + // this stage (\ref buildModuleSimplificationPipeline). + if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO)) + MPM.add(createHotColdSplittingPass()); + if (MergeFunctions) MPM.add(createMergeFunctionsPass()); @@ -738,9 +772,6 @@ void PassManagerBuilder::populateModulePassManager( // flattening of blocks. MPM.add(createDivRemPairsPass()); - if (EnableHotColdSplit) - MPM.add(createHotColdSplittingPass()); - // LoopSink (and other loop passes since the last simplifyCFG) might have // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. MPM.add(createCFGSimplificationPass()); @@ -793,6 +824,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // Attach metadata to indirect call sites indicating the set of functions // they may target at run-time. This should follow IPSCCP. PM.add(createCalledValuePropagationPass()); + + // Infer attributes on declarations, call sites, arguments, etc. + PM.add(createAttributorLegacyPass()); } // Infer attributes about definitions. The readnone attribute in particular is @@ -842,6 +876,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createPruneEHPass()); // Remove dead EH info. + // CSFDO instrumentation and use pass. + addPGOInstrPasses(PM, /* IsCS */ true); + // Optimize globals again if we ran the inliner. if (RunInliner) PM.add(createGlobalOptimizerPass()); @@ -859,11 +896,16 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // Break up allocas PM.add(createSROAPass()); - // Run a few AA driven optimizations here and now, to cleanup the code. + // LTO provides additional opportunities for tailcall elimination due to + // link-time inlining, and visibility of nocapture attribute. + PM.add(createTailCallEliminationPass()); + + // Infer attributes on declarations, call sites, arguments, etc. PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture. + // Run a few AA driven optimizations here and now, to cleanup the code. PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. - PM.add(createLICMPass()); // Hoist loop invariants. + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds. PM.add(NewGVN ? createNewGVNPass() : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. @@ -878,11 +920,13 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { if (EnableLoopInterchange) PM.add(createLoopInterchangePass()); - PM.add(createSimpleLoopUnrollPass(OptLevel, - DisableUnrollLoops)); // Unroll small loops + // Unroll small loops + PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); PM.add(createLoopVectorizePass(true, !LoopVectorize)); // The vectorizer may have significantly shortened a loop body; unroll again. - PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops)); + PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); PM.add(createWarnMissedTransformationsPass()); @@ -896,9 +940,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createBitTrackingDCEPass()); // More scalar chains could be vectorized due to more alias information - if (RunSLPAfterLoopVectorization) - if (SLPVectorize) - PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + if (SLPVectorize) + PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. // After vectorization, assume intrinsics may tell us more about pointer // alignments. @@ -913,6 +956,11 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { void PassManagerBuilder::addLateLTOOptimizationPasses( legacy::PassManagerBase &PM) { + // See comment in the new PM for justification of scheduling splitting at + // this stage (\ref buildLTODefaultPipeline). + if (EnableHotColdSplit) + PM.add(createHotColdSplittingPass()); + // Delete basic blocks, which optimization passes may have killed. PM.add(createCFGSimplificationPass()); @@ -968,6 +1016,8 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { if (VerifyInput) PM.add(createVerifierPass()); + addExtensionsToPM(EP_FullLinkTimeOptimizationEarly, PM); + if (OptLevel != 0) addLTOOptimizationPasses(PM); else { @@ -989,6 +1039,8 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { if (OptLevel != 0) addLateLTOOptimizationPasses(PM); + addExtensionsToPM(EP_FullLinkTimeOptimizationLast, PM); + if (VerifyOutput) PM.add(createVerifierPass()); } |