aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/IPO/Inliner.cpp')
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Inliner.cpp253
1 files changed, 94 insertions, 159 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
index bbe5f8761d5f..79535ca49780 100644
--- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO/InlinerPass.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -21,6 +20,7 @@
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
@@ -28,9 +28,9 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -47,40 +47,19 @@ STATISTIC(NumMergedAllocas, "Number of allocas merged together");
// if those would be more profitable and blocked inline steps.
STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
-static cl::opt<int>
-InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
- cl::desc("Control the amount of inlining to perform (default = 225)"));
-
-static cl::opt<int>
-HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325),
- cl::desc("Threshold for inlining functions with inline hint"));
-
-// We instroduce this threshold to help performance of instrumentation based
-// PGO before we actually hook up inliner with analysis passes such as BPI and
-// BFI.
-static cl::opt<int>
-ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(225),
- cl::desc("Threshold for inlining functions with cold attribute"));
-
-// Threshold to use when optsize is specified (and there is no -inline-limit).
-const int OptSizeThreshold = 75;
+Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {}
-Inliner::Inliner(char &ID)
- : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) {
-}
-
-Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
- : CallGraphSCCPass(ID),
- InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? InlineLimit
- : Threshold),
- InsertLifetime(InsertLifetime) {}
+Inliner::Inliner(char &ID, bool InsertLifetime)
+ : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
/// For this class, we declare that we require and preserve the call graph.
/// If the derived class implements this method, it should
/// always explicitly call the implementation here.
void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ getAAResultsAnalysisUsage(AU);
CallGraphSCCPass::getAnalysisUsage(AU);
}
@@ -243,67 +222,6 @@ static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI,
return true;
}
-unsigned Inliner::getInlineThreshold(CallSite CS) const {
- int Threshold = InlineThreshold; // -inline-threshold or else selected by
- // overall opt level
-
- // If -inline-threshold is not given, listen to the optsize attribute when it
- // would decrease the threshold.
- Function *Caller = CS.getCaller();
- bool OptSize = Caller && !Caller->isDeclaration() &&
- // FIXME: Use Function::optForSize().
- Caller->hasFnAttribute(Attribute::OptimizeForSize);
- if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
- OptSizeThreshold < Threshold)
- Threshold = OptSizeThreshold;
-
- Function *Callee = CS.getCalledFunction();
- if (!Callee || Callee->isDeclaration())
- return Threshold;
-
- // If profile information is available, use that to adjust threshold of hot
- // and cold functions.
- // FIXME: The heuristic used below for determining hotness and coldness are
- // based on preliminary SPEC tuning and may not be optimal. Replace this with
- // a well-tuned heuristic based on *callsite* hotness and not callee hotness.
- uint64_t FunctionCount = 0, MaxFunctionCount = 0;
- bool HasPGOCounts = false;
- if (Callee->getEntryCount() &&
- Callee->getParent()->getMaximumFunctionCount()) {
- HasPGOCounts = true;
- FunctionCount = Callee->getEntryCount().getValue();
- MaxFunctionCount =
- Callee->getParent()->getMaximumFunctionCount().getValue();
- }
-
- // Listen to the inlinehint attribute or profile based hotness information
- // when it would increase the threshold and the caller does not need to
- // minimize its size.
- bool InlineHint =
- Callee->hasFnAttribute(Attribute::InlineHint) ||
- (HasPGOCounts &&
- FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount));
- if (InlineHint && HintThreshold > Threshold &&
- !Caller->hasFnAttribute(Attribute::MinSize))
- Threshold = HintThreshold;
-
- // Listen to the cold attribute or profile based coldness information
- // when it would decrease the threshold.
- bool ColdCallee =
- Callee->hasFnAttribute(Attribute::Cold) ||
- (HasPGOCounts &&
- FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount));
- // Command line argument for InlineLimit will override the default
- // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
- // do not use the default cold threshold even if it is smaller.
- if ((InlineLimit.getNumOccurrences() == 0 ||
- ColdThreshold.getNumOccurrences() > 0) && ColdCallee &&
- ColdThreshold < Threshold)
- Threshold = ColdThreshold;
-
- return Threshold;
-}
-
static void emitAnalysis(CallSite CS, const Twine &Msg) {
Function *Caller = CS.getCaller();
LLVMContext &Ctx = Caller->getContext();
@@ -311,6 +229,76 @@ static void emitAnalysis(CallSite CS, const Twine &Msg) {
emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg);
}
+bool Inliner::shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
+ int &TotalSecondaryCost) {
+
+ // For now we only handle local or inline functions.
+ if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage())
+ return false;
+ // Try to detect the case where the current inlining candidate caller (call
+ // it B) is a static or linkonce-ODR function and is an inlining candidate
+ // elsewhere, and the current candidate callee (call it C) is large enough
+ // that inlining it into B would make B too big to inline later. In these
+ // circumstances it may be best not to inline C into B, but to inline B into
+ // its callers.
+ //
+ // This only applies to static and linkonce-ODR functions because those are
+ // expected to be available for inlining in the translation units where they
+ // are used. Thus we will always have the opportunity to make local inlining
+ // decisions. Importantly the linkonce-ODR linkage covers inline functions
+ // and templates in C++.
+ //
+ // FIXME: All of this logic should be sunk into getInlineCost. It relies on
+ // the internal implementation of the inline cost metrics rather than
+ // treating them as truly abstract units etc.
+ TotalSecondaryCost = 0;
+ // The candidate cost to be imposed upon the current function.
+ int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
+ // This bool tracks what happens if we do NOT inline C into B.
+ bool callerWillBeRemoved = Caller->hasLocalLinkage();
+ // This bool tracks what happens if we DO inline C into B.
+ bool inliningPreventsSomeOuterInline = false;
+ for (User *U : Caller->users()) {
+ CallSite CS2(U);
+
+ // If this isn't a call to Caller (it could be some other sort
+ // of reference) skip it. Such references will prevent the caller
+ // from being removed.
+ if (!CS2 || CS2.getCalledFunction() != Caller) {
+ callerWillBeRemoved = false;
+ continue;
+ }
+
+ InlineCost IC2 = getInlineCost(CS2);
+ ++NumCallerCallersAnalyzed;
+ if (!IC2) {
+ callerWillBeRemoved = false;
+ continue;
+ }
+ if (IC2.isAlways())
+ continue;
+
+ // See if inlining or original callsite would erase the cost delta of
+ // this callsite. We subtract off the penalty for the call instruction,
+ // which we would be deleting.
+ if (IC2.getCostDelta() <= CandidateCost) {
+ inliningPreventsSomeOuterInline = true;
+ TotalSecondaryCost += IC2.getCost();
+ }
+ }
+ // If all outer calls to Caller would get inlined, the cost for the last
+ // one is set very low by getInlineCost, in anticipation that Caller will
+ // be removed entirely. We did not account for this above unless there
+ // is only one caller of Caller.
+ if (callerWillBeRemoved && !Caller->use_empty())
+ TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
+
+ if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost())
+ return true;
+
+ return false;
+}
+
/// Return true if the inliner should attempt to inline at the given CallSite.
bool Inliner::shouldInline(CallSite CS) {
InlineCost IC = getInlineCost(CS);
@@ -342,77 +330,17 @@ bool Inliner::shouldInline(CallSite CS) {
Twine(IC.getCostDelta() + IC.getCost()) + ")");
return false;
}
-
- // Try to detect the case where the current inlining candidate caller (call
- // it B) is a static or linkonce-ODR function and is an inlining candidate
- // elsewhere, and the current candidate callee (call it C) is large enough
- // that inlining it into B would make B too big to inline later. In these
- // circumstances it may be best not to inline C into B, but to inline B into
- // its callers.
- //
- // This only applies to static and linkonce-ODR functions because those are
- // expected to be available for inlining in the translation units where they
- // are used. Thus we will always have the opportunity to make local inlining
- // decisions. Importantly the linkonce-ODR linkage covers inline functions
- // and templates in C++.
- //
- // FIXME: All of this logic should be sunk into getInlineCost. It relies on
- // the internal implementation of the inline cost metrics rather than
- // treating them as truly abstract units etc.
- if (Caller->hasLocalLinkage() || Caller->hasLinkOnceODRLinkage()) {
- int TotalSecondaryCost = 0;
- // The candidate cost to be imposed upon the current function.
- int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
- // This bool tracks what happens if we do NOT inline C into B.
- bool callerWillBeRemoved = Caller->hasLocalLinkage();
- // This bool tracks what happens if we DO inline C into B.
- bool inliningPreventsSomeOuterInline = false;
- for (User *U : Caller->users()) {
- CallSite CS2(U);
-
- // If this isn't a call to Caller (it could be some other sort
- // of reference) skip it. Such references will prevent the caller
- // from being removed.
- if (!CS2 || CS2.getCalledFunction() != Caller) {
- callerWillBeRemoved = false;
- continue;
- }
- InlineCost IC2 = getInlineCost(CS2);
- ++NumCallerCallersAnalyzed;
- if (!IC2) {
- callerWillBeRemoved = false;
- continue;
- }
- if (IC2.isAlways())
- continue;
-
- // See if inlining or original callsite would erase the cost delta of
- // this callsite. We subtract off the penalty for the call instruction,
- // which we would be deleting.
- if (IC2.getCostDelta() <= CandidateCost) {
- inliningPreventsSomeOuterInline = true;
- TotalSecondaryCost += IC2.getCost();
- }
- }
- // If all outer calls to Caller would get inlined, the cost for the last
- // one is set very low by getInlineCost, in anticipation that Caller will
- // be removed entirely. We did not account for this above unless there
- // is only one caller of Caller.
- if (callerWillBeRemoved && !Caller->use_empty())
- TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
-
- if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) {
- DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() <<
- " Cost = " << IC.getCost() <<
- ", outer Cost = " << TotalSecondaryCost << '\n');
- emitAnalysis(
- CS, Twine("Not inlining. Cost of inlining " +
- CS.getCalledFunction()->getName() +
- " increases the cost of inlining " +
- CS.getCaller()->getName() + " in other contexts"));
- return false;
- }
+ int TotalSecondaryCost = 0;
+ if (shouldBeDeferred(Caller, CS, IC, TotalSecondaryCost)) {
+ DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction()
+ << " Cost = " << IC.getCost()
+ << ", outer Cost = " << TotalSecondaryCost << '\n');
+ emitAnalysis(CS, Twine("Not inlining. Cost of inlining " +
+ CS.getCalledFunction()->getName() +
+ " increases the cost of inlining " +
+ CS.getCaller()->getName() + " in other contexts"));
+ return false;
}
DEBUG(dbgs() << " Inlining: cost=" << IC.getCost()
@@ -440,8 +368,15 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
}
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
+ if (skipSCC(SCC))
+ return false;
+ return inlineCalls(SCC);
+}
+
+bool Inliner::inlineCalls(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
ACT = &getAnalysis<AssumptionCacheTracker>();
+ PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(CG.getModule());
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
SmallPtrSet<Function*, 8> SCCFunctions;