aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Analysis/InlineCost.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Analysis/InlineCost.cpp')
-rw-r--r--llvm/lib/Analysis/InlineCost.cpp479
1 files changed, 373 insertions, 106 deletions
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index de83a48aad16..33d714406d7f 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -24,9 +24,11 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -38,6 +40,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -46,6 +49,15 @@ using namespace llvm;
STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
+static cl::opt<int>
+ DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225),
+ cl::ZeroOrMore,
+ cl::desc("Default amount of inlining to perform"));
+
+static cl::opt<bool> PrintInstructionComments(
+ "print-instruction-comments", cl::Hidden, cl::init(false),
+ cl::desc("Prints comments for instruction based on inline cost analysis"));
+
static cl::opt<int> InlineThreshold(
"inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
cl::desc("Control the amount of inlining to perform (default = 225)"));
@@ -92,8 +104,52 @@ static cl::opt<bool> OptComputeFullInlineCost(
cl::desc("Compute the full inline cost of a call site even when the cost "
"exceeds the threshold."));
+static cl::opt<bool> InlineCallerSupersetNoBuiltin(
+ "inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true),
+ cl::ZeroOrMore,
+ cl::desc("Allow inlining when caller has a superset of callee's nobuiltin "
+ "attributes."));
+
+static cl::opt<bool> DisableGEPConstOperand(
+ "disable-gep-const-evaluation", cl::Hidden, cl::init(false),
+ cl::desc("Disables evaluation of GetElementPtr with constant operands"));
+
namespace {
class InlineCostCallAnalyzer;
+
+// This struct is used to store information about inline cost of a
+// particular instruction
+struct InstructionCostDetail {
+ int CostBefore = 0;
+ int CostAfter = 0;
+ int ThresholdBefore = 0;
+ int ThresholdAfter = 0;
+
+ int getThresholdDelta() const { return ThresholdAfter - ThresholdBefore; }
+
+ int getCostDelta() const { return CostAfter - CostBefore; }
+
+ bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; }
+};
+
+class InlineCostAnnotationWriter : public AssemblyAnnotationWriter {
+private:
+ InlineCostCallAnalyzer *const ICCA;
+
+public:
+ InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
+ virtual void emitInstructionAnnot(const Instruction *I,
+ formatted_raw_ostream &OS) override;
+};
+
+/// Carry out call site analysis, in order to evaluate inlinability.
+/// NOTE: the type is currently used as implementation detail of functions such
+/// as llvm::getInlineCost. Note the function_ref constructor parameters - the
+/// expectation is that they come from the outer scope, from the wrapper
+/// functions. If we want to support constructing CallAnalyzer objects where
+/// lambdas are provided inline at construction, or where the object needs to
+/// otherwise survive past the scope of the provided functions, we need to
+/// revisit the argument types.
class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
typedef InstVisitor<CallAnalyzer, bool> Base;
friend class InstVisitor<CallAnalyzer, bool>;
@@ -104,10 +160,10 @@ protected:
const TargetTransformInfo &TTI;
/// Getter for the cache of @llvm.assume intrinsics.
- std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
+ function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
/// Getter for BlockFrequencyInfo
- Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI;
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
/// Profile summary information.
ProfileSummaryInfo *PSI;
@@ -130,11 +186,16 @@ protected:
/// Called after a basic block was analyzed.
virtual void onBlockAnalyzed(const BasicBlock *BB) {}
+ /// Called before an instruction was analyzed
+ virtual void onInstructionAnalysisStart(const Instruction *I) {}
+
+ /// Called after an instruction was analyzed
+ virtual void onInstructionAnalysisFinish(const Instruction *I) {}
+
/// Called at the end of the analysis of the callsite. Return the outcome of
/// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or
/// the reason it can't.
- virtual InlineResult finalizeAnalysis() { return true; }
-
+ virtual InlineResult finalizeAnalysis() { return InlineResult::success(); }
/// Called when we're about to start processing a basic block, and every time
/// we are done processing an instruction. Return true if there is no point in
/// continuing the analysis (e.g. we've determined already the call site is
@@ -145,8 +206,7 @@ protected:
/// contexts propagated). It checks callsite-specific information. Return a
/// reason analysis can't continue if that's the case, or 'true' if it may
/// continue.
- virtual InlineResult onAnalysisStart() { return true; }
-
+ virtual InlineResult onAnalysisStart() { return InlineResult::success(); }
/// Called if the analysis engine decides SROA cannot be done for the given
/// alloca.
virtual void onDisableSROA(AllocaInst *Arg) {}
@@ -187,7 +247,7 @@ protected:
/// Called to account for any other instruction not specifically accounted
/// for.
- virtual void onCommonInstructionSimplification() {}
+ virtual void onMissedSimplification() {}
/// Start accounting potential benefits due to SROA for the given alloca.
virtual void onInitializeSROAArg(AllocaInst *Arg) {}
@@ -236,9 +296,7 @@ protected:
DenseMap<Value *, AllocaInst *> SROAArgValues;
/// Keep track of Allocas for which we believe we may get SROA optimization.
- /// We don't delete entries in SROAArgValue because we still want
- /// isAllocaDerivedArg to function correctly.
- DenseSet<AllocaInst *> EnabledSROAArgValues;
+ DenseSet<AllocaInst *> EnabledSROAAllocas;
/// Keep track of values which map to a pointer base and constant offset.
DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
@@ -258,8 +316,7 @@ protected:
AllocaInst *getSROAArgForValueOrNull(Value *V) const {
auto It = SROAArgValues.find(V);
- if (It == SROAArgValues.end() ||
- EnabledSROAArgValues.count(It->second) == 0)
+ if (It == SROAArgValues.end() || EnabledSROAAllocas.count(It->second) == 0)
return nullptr;
return It->second;
}
@@ -337,17 +394,24 @@ protected:
bool visitUnreachableInst(UnreachableInst &I);
public:
- CallAnalyzer(const TargetTransformInfo &TTI,
- std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
- Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
- ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
- Function &Callee, CallBase &Call)
+ CallAnalyzer(
+ Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,
+ function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
+ ProfileSummaryInfo *PSI = nullptr,
+ OptimizationRemarkEmitter *ORE = nullptr)
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
CandidateCall(Call), EnableLoadElimination(true) {}
InlineResult analyze();
+ Optional<Constant*> getSimplifiedValue(Instruction *I) {
+ if (SimplifiedValues.find(I) != SimplifiedValues.end())
+ return SimplifiedValues[I];
+ return None;
+ }
+
// Keep a bunch of stats about the cost savings found so we can print them
// out when debugging.
unsigned NumConstantArgs = 0;
@@ -375,6 +439,11 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
/// Tunable parameters that control the analysis.
const InlineParams &Params;
+ // This DenseMap stores the delta change in cost and threshold after
+ // accounting for the given instruction. The map is filled only with the
+ // flag PrintInstructionComments on.
+ DenseMap<const Instruction *, InstructionCostDetail> InstructionCostDetailMap;
+
/// Upper bound for the inlining cost. Bonuses are being applied to account
/// for speculative "expected profit" of the inlining decision.
int Threshold = 0;
@@ -382,6 +451,9 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
/// Attempt to evaluate indirect calls to boost its inline cost.
const bool BoostIndirectCalls;
+ /// Ignore the threshold when finalizing analysis.
+ const bool IgnoreThreshold;
+
/// Inlining cost measured in abstract units, accounts for all the
/// instructions expected to be executed for a given function invocation.
/// Instructions that are statically proven to be dead based on call-site
@@ -456,9 +528,9 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
InlineConstants::IndirectCallThreshold;
/// FIXME: if InlineCostCallAnalyzer is derived from, this may need
/// to instantiate the derived class.
- InlineCostCallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F,
- Call, IndirectCallParams, false);
- if (CA.analyze()) {
+ InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
+ GetAssumptionCache, GetBFI, PSI, ORE, false);
+ if (CA.analyze().isSuccess()) {
// We were able to inline the indirect call! Subtract the cost from the
// threshold to get the bonus we want to apply, but don't go below zero.
Cost -= std::max(0, CA.getThreshold() - CA.getCost());
@@ -507,7 +579,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
addCost(SwitchCost, (int64_t)CostUpperBound);
}
- void onCommonInstructionSimplification() override {
+ void onMissedSimplification() override {
addCost(InlineConstants::InstrCost);
}
@@ -515,7 +587,6 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
assert(Arg != nullptr &&
"Should not initialize SROA costs for null value.");
SROAArgCosts[Arg] = 0;
- EnabledSROAArgValues.insert(Arg);
}
void onAggregateSROAUse(AllocaInst *SROAArg) override {
@@ -538,6 +609,25 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
SingleBB = false;
}
}
+
+ void onInstructionAnalysisStart(const Instruction *I) override {
+ // This function is called to store the initial cost of inlining before
+ // the given instruction was assessed.
+ if (!PrintInstructionComments)
+ return;
+ InstructionCostDetailMap[I].CostBefore = Cost;
+ InstructionCostDetailMap[I].ThresholdBefore = Threshold;
+ }
+
+ void onInstructionAnalysisFinish(const Instruction *I) override {
+ // This function is called to find new values of cost and threshold after
+ // the instruction has been assessed.
+ if (!PrintInstructionComments)
+ return;
+ InstructionCostDetailMap[I].CostAfter = Cost;
+ InstructionCostDetailMap[I].ThresholdAfter = Threshold;
+ }
+
InlineResult finalizeAnalysis() override {
// Loops generally act a lot like calls in that they act like barriers to
// movement, require a certain amount of setup, etc. So when optimising for
@@ -566,12 +656,14 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
else if (NumVectorInstructions <= NumInstructions / 2)
Threshold -= VectorBonus / 2;
- return Cost < std::max(1, Threshold);
+ if (IgnoreThreshold || Cost < std::max(1, Threshold))
+ return InlineResult::success();
+ return InlineResult::failure("Cost over threshold.");
}
bool shouldStop() override {
// Bail out the moment we cross the threshold. This means we'll under-count
// the cost, but only when undercounting doesn't matter.
- return Cost >= Threshold && !ComputeFullInlineCost;
+ return !IgnoreThreshold && Cost >= Threshold && !ComputeFullInlineCost;
}
void onLoadEliminationOpportunity() override {
@@ -618,25 +710,42 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
// Check if we're done. This can happen due to bonuses and penalties.
if (Cost >= Threshold && !ComputeFullInlineCost)
- return "high cost";
+ return InlineResult::failure("high cost");
- return true;
+ return InlineResult::success();
}
public:
InlineCostCallAnalyzer(
+ Function &Callee, CallBase &Call, const InlineParams &Params,
const TargetTransformInfo &TTI,
- std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
- Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
- ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee,
- CallBase &Call, const InlineParams &Params, bool BoostIndirect = true)
- : CallAnalyzer(TTI, GetAssumptionCache, GetBFI, PSI, ORE, Callee, Call),
+ function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
+ ProfileSummaryInfo *PSI = nullptr,
+ OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true,
+ bool IgnoreThreshold = false)
+ : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI, ORE),
ComputeFullInlineCost(OptComputeFullInlineCost ||
Params.ComputeFullInlineCost || ORE),
Params(Params), Threshold(Params.DefaultThreshold),
- BoostIndirectCalls(BoostIndirect) {}
+ BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
+ Writer(this) {}
+
+ /// Annotation Writer for instruction details
+ InlineCostAnnotationWriter Writer;
+
void dump();
+ // Prints the same analysis as dump(), but its definition is not dependent
+ // on the build.
+ void print();
+
+ Optional<InstructionCostDetail> getCostDetails(const Instruction *I) {
+ if (InstructionCostDetailMap.find(I) != InstructionCostDetailMap.end())
+ return InstructionCostDetailMap[I];
+ return None;
+ }
+
virtual ~InlineCostCallAnalyzer() {}
int getThreshold() { return Threshold; }
int getCost() { return Cost; }
@@ -650,9 +759,35 @@ bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
onDisableSROA(SROAArg);
- EnabledSROAArgValues.erase(SROAArg);
+ EnabledSROAAllocas.erase(SROAArg);
disableLoadElimination();
}
+
+void InlineCostAnnotationWriter::emitInstructionAnnot(const Instruction *I,
+ formatted_raw_ostream &OS) {
+ // The cost of inlining of the given instruction is printed always.
+ // The threshold delta is printed only when it is non-zero. It happens
+ // when we decided to give a bonus at a particular instruction.
+ Optional<InstructionCostDetail> Record = ICCA->getCostDetails(I);
+ if (!Record)
+ OS << "; No analysis for the instruction";
+ else {
+ OS << "; cost before = " << Record->CostBefore
+ << ", cost after = " << Record->CostAfter
+ << ", threshold before = " << Record->ThresholdBefore
+ << ", threshold after = " << Record->ThresholdAfter << ", ";
+ OS << "cost delta = " << Record->getCostDelta();
+ if (Record->hasThresholdChanged())
+ OS << ", threshold delta = " << Record->getThresholdDelta();
+ }
+ auto C = ICCA->getSimplifiedValue(const_cast<Instruction *>(I));
+ if (C) {
+ OS << ", simplified to ";
+ C.getValue()->print(OS, true);
+ }
+ OS << "\n";
+}
+
/// If 'V' maps to a SROA candidate, disable SROA for it.
void CallAnalyzer::disableSROA(Value *V) {
if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
@@ -711,7 +846,9 @@ bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
Operands.push_back(SimpleOp);
else
Operands.push_back(*I);
- return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands);
+ return TargetTransformInfo::TCC_Free ==
+ TTI.getUserCost(&GEP, Operands,
+ TargetTransformInfo::TCK_SizeAndLatency);
}
bool CallAnalyzer::visitAlloca(AllocaInst &I) {
@@ -720,10 +857,22 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
if (I.isArrayAllocation()) {
Constant *Size = SimplifiedValues.lookup(I.getArraySize());
if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
+ // Sometimes a dynamic alloca could be converted into a static alloca
+ // after this constant prop, and become a huge static alloca on an
+ // unconditional CFG path. Avoid inlining if this is going to happen above
+ // a threshold.
+ // FIXME: If the threshold is removed or lowered too much, we could end up
+ // being too pessimistic and prevent inlining non-problematic code. This
+ // could result in unintended perf regressions. A better overall strategy
+ // is needed to track stack usage during inlining.
Type *Ty = I.getAllocatedType();
AllocatedSize = SaturatingMultiplyAdd(
AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty).getFixedSize(),
AllocatedSize);
+ if (AllocatedSize > InlineConstants::MaxSimplifiedDynamicAllocaToInline) {
+ HasDynamicAlloca = true;
+ return false;
+ }
return Base::visitAlloca(I);
}
}
@@ -874,6 +1023,16 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
return true;
};
+ if (!DisableGEPConstOperand)
+ if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
+ SmallVector<Constant *, 2> Indices;
+ for (unsigned int Index = 1 ; Index < COps.size() ; ++Index)
+ Indices.push_back(COps[Index]);
+ return ConstantExpr::getGetElementPtr(I.getSourceElementType(), COps[0],
+ Indices, I.isInBounds());
+ }))
+ return true;
+
if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
if (SROAArg)
SROAArgValues[&I] = SROAArg;
@@ -959,7 +1118,8 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
SROAArgValues[&I] = SROAArg;
- return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
+ return TargetTransformInfo::TCC_Free ==
+ TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
}
bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
@@ -983,7 +1143,8 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
if (auto *SROAArg = getSROAArgForValueOrNull(Op))
SROAArgValues[&I] = SROAArg;
- return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
+ return TargetTransformInfo::TCC_Free ==
+ TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
}
bool CallAnalyzer::visitCastInst(CastInst &I) {
@@ -993,7 +1154,8 @@ bool CallAnalyzer::visitCastInst(CastInst &I) {
}))
return true;
- // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
+ // Disable SROA in the face of arbitrary casts we don't explicitly list
+ // elsewhere.
disableSROA(I.getOperand(0));
// If this is a floating-point cast, and the target says this operation
@@ -1013,7 +1175,8 @@ bool CallAnalyzer::visitCastInst(CastInst &I) {
break;
}
- return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
+ return TargetTransformInfo::TCC_Free ==
+ TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
}
bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
@@ -1085,7 +1248,7 @@ bool InlineCostCallAnalyzer::isColdCallSite(CallBase &Call,
// If global profile summary is available, then callsite's coldness is
// determined based on that.
if (PSI && PSI->hasProfileSummary())
- return PSI->isColdCallSite(CallSite(&Call), CallerBFI);
+ return PSI->isColdCallSite(Call, CallerBFI);
// Otherwise we need BFI to be available.
if (!CallerBFI)
@@ -1109,8 +1272,7 @@ InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
// If global profile summary is available, then callsite's hotness is
// determined based on that.
- if (PSI && PSI->hasProfileSummary() &&
- PSI->isHotCallSite(CallSite(&Call), CallerBFI))
+ if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))
return Params.HotCallSiteThreshold;
// Otherwise we need BFI to be available and to have a locally hot callsite
@@ -1200,7 +1362,7 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
// Callsite hotness and coldness can be determined if sample profile is
// used (which adds hotness metadata to calls) or if caller's
// BlockFrequencyInfo is available.
- BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
+ BlockFrequencyInfo *CallerBFI = GetBFI ? &(GetBFI(*Caller)) : nullptr;
auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI);
if (!Caller->hasOptSize() && HotCallSiteThreshold) {
LLVM_DEBUG(dbgs() << "Hot callsite.\n");
@@ -1667,7 +1829,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
// does not (yet) fire.
unsigned JumpTableSize = 0;
- BlockFrequencyInfo *BFI = GetBFI ? &((*GetBFI)(F)) : nullptr;
+ BlockFrequencyInfo *BFI = GetBFI ? &(GetBFI(F)) : nullptr;
unsigned NumCaseCluster =
TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize, PSI, BFI);
@@ -1716,7 +1878,8 @@ bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
bool CallAnalyzer::visitInstruction(Instruction &I) {
// Some instructions are free. All of the free intrinsics can also be
// handled by SROA, etc.
- if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
+ if (TargetTransformInfo::TCC_Free ==
+ TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency))
return true;
// We found something we don't understand or can't handle. Mark any SROA-able
@@ -1761,33 +1924,36 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
// all of the per-instruction logic. The visit tree returns true if we
// consumed the instruction in any way, and false if the instruction's base
// cost should count against inlining.
+ onInstructionAnalysisStart(&*I);
+
if (Base::visit(&*I))
++NumInstructionsSimplified;
else
- onCommonInstructionSimplification();
+ onMissedSimplification();
+ onInstructionAnalysisFinish(&*I);
using namespace ore;
// If the visit this instruction detected an uninlinable pattern, abort.
- InlineResult IR;
+ InlineResult IR = InlineResult::success();
if (IsRecursiveCall)
- IR = "recursive";
+ IR = InlineResult::failure("recursive");
else if (ExposesReturnsTwice)
- IR = "exposes returns twice";
+ IR = InlineResult::failure("exposes returns twice");
else if (HasDynamicAlloca)
- IR = "dynamic alloca";
+ IR = InlineResult::failure("dynamic alloca");
else if (HasIndirectBr)
- IR = "indirect branch";
+ IR = InlineResult::failure("indirect branch");
else if (HasUninlineableIntrinsic)
- IR = "uninlinable intrinsic";
+ IR = InlineResult::failure("uninlinable intrinsic");
else if (InitsVargArgs)
- IR = "varargs";
- if (!IR) {
+ IR = InlineResult::failure("varargs");
+ if (!IR.isSuccess()) {
if (ORE)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
&CandidateCall)
<< NV("Callee", &F) << " has uninlinable pattern ("
- << NV("InlineResult", IR.message)
+ << NV("InlineResult", IR.getFailureReason())
<< ") and cost is not fully computed";
});
return IR;
@@ -1798,22 +1964,25 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
// the caller stack usage dramatically.
if (IsCallerRecursive &&
AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) {
- InlineResult IR = "recursive and allocates too much stack space";
+ auto IR =
+ InlineResult::failure("recursive and allocates too much stack space");
if (ORE)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
&CandidateCall)
- << NV("Callee", &F) << " is " << NV("InlineResult", IR.message)
+ << NV("Callee", &F) << " is "
+ << NV("InlineResult", IR.getFailureReason())
<< ". Cost is not fully computed";
});
return IR;
}
if (shouldStop())
- return false;
+ return InlineResult::failure(
+ "Call site analysis is not favorable to inlining.");
}
- return true;
+ return InlineResult::success();
}
/// Compute the base pointer and cumulative constant offsets for V.
@@ -1904,11 +2073,11 @@ InlineResult CallAnalyzer::analyze() {
++NumCallsAnalyzed;
auto Result = onAnalysisStart();
- if (!Result)
+ if (!Result.isSuccess())
return Result;
if (F.empty())
- return true;
+ return InlineResult::success();
Function *Caller = CandidateCall.getFunction();
// Check if the caller function is recursive itself.
@@ -1937,6 +2106,7 @@ InlineResult CallAnalyzer::analyze() {
if (auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
SROAArgValues[&*FAI] = SROAArg;
onInitializeSROAArg(SROAArg);
+ EnabledSROAAllocas.insert(SROAArg);
}
}
}
@@ -1983,12 +2153,12 @@ InlineResult CallAnalyzer::analyze() {
if (BB->hasAddressTaken())
for (User *U : BlockAddress::get(&*BB)->users())
if (!isa<CallBrInst>(*U))
- return "blockaddress used outside of callbr";
+ return InlineResult::failure("blockaddress used outside of callbr");
// Analyze the cost of this block. If we blow through the threshold, this
// returns false, and we can bail on out.
InlineResult IR = analyzeBlock(BB, EphValues);
- if (!IR)
+ if (!IR.isSuccess())
return IR;
Instruction *TI = BB->getTerminator();
@@ -2034,15 +2204,15 @@ InlineResult CallAnalyzer::analyze() {
// inlining this would cause the removal of the caller (so the instruction
// is not actually duplicated, just moved).
if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
- return "noduplicate";
+ return InlineResult::failure("noduplicate");
return finalizeAnalysis();
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-/// Dump stats about this call's analysis.
-LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() {
+void InlineCostCallAnalyzer::print() {
#define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
+ if (PrintInstructionComments)
+ F.print(dbgs(), &Writer);
DEBUG_PRINT_STAT(NumConstantArgs);
DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
DEBUG_PRINT_STAT(NumAllocaArgs);
@@ -2058,14 +2228,27 @@ LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() {
DEBUG_PRINT_STAT(Threshold);
#undef DEBUG_PRINT_STAT
}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+/// Dump stats about this call's analysis.
+LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() {
+ print();
+}
#endif
/// Test that there are no attribute conflicts between Caller and Callee
/// that prevent inlining.
-static bool functionsHaveCompatibleAttributes(Function *Caller,
- Function *Callee,
- TargetTransformInfo &TTI) {
+static bool functionsHaveCompatibleAttributes(
+ Function *Caller, Function *Callee, TargetTransformInfo &TTI,
+ function_ref<const TargetLibraryInfo &(Function &)> &GetTLI) {
+ // Note that CalleeTLI must be a copy not a reference. The legacy pass manager
+ // caches the most recently created TLI in the TargetLibraryInfoWrapperPass
+ // object, and always returns the same object (which is overwritten on each
+ // GetTLI call). Therefore we copy the first result.
+ auto CalleeTLI = GetTLI(*Callee);
return TTI.areInlineCompatible(Caller, Callee) &&
+ GetTLI(*Caller).areInlineCompatible(CalleeTLI,
+ InlineCallerSupersetNoBuiltin) &&
AttributeFuncs::areInlineCompatible(*Caller, *Callee);
}
@@ -2104,23 +2287,46 @@ int llvm::getCallsiteCost(CallBase &Call, const DataLayout &DL) {
InlineCost llvm::getInlineCost(
CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
- std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
- Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
+ function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
- GetAssumptionCache, GetBFI, PSI, ORE);
+ GetAssumptionCache, GetTLI, GetBFI, PSI, ORE);
}
-InlineCost llvm::getInlineCost(
- CallBase &Call, Function *Callee, const InlineParams &Params,
- TargetTransformInfo &CalleeTTI,
- std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
- Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
+Optional<int> llvm::getInliningCostEstimate(
+ CallBase &Call, TargetTransformInfo &CalleeTTI,
+ function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
+ const InlineParams Params = {/* DefaultThreshold*/ 0,
+ /*HintThreshold*/ {},
+ /*ColdThreshold*/ {},
+ /*OptSizeThreshold*/ {},
+ /*OptMinSizeThreshold*/ {},
+ /*HotCallSiteThreshold*/ {},
+ /*LocallyHotCallSiteThreshold*/ {},
+ /*ColdCallSiteThreshold*/ {},
+ /*ComputeFullInlineCost*/ true,
+ /*EnableDeferral*/ true};
+
+ InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
+ GetAssumptionCache, GetBFI, PSI, ORE, true,
+ /*IgnoreThreshold*/ true);
+ auto R = CA.analyze();
+ if (!R.isSuccess())
+ return None;
+ return CA.getCost();
+}
+
+Optional<InlineResult> llvm::getAttributeBasedInliningDecision(
+ CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI,
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
// Cannot inline indirect calls.
if (!Callee)
- return llvm::InlineCost::getNever("indirect call");
+ return InlineResult::failure("indirect call");
// Never inline calls with byval arguments that does not have the alloca
// address space. Since byval arguments can be replaced with a copy to an
@@ -2132,59 +2338,80 @@ InlineCost llvm::getInlineCost(
if (Call.isByValArgument(I)) {
PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
if (PTy->getAddressSpace() != AllocaAS)
- return llvm::InlineCost::getNever("byval arguments without alloca"
- " address space");
+ return InlineResult::failure("byval arguments without alloca"
+ " address space");
}
// Calls to functions with always-inline attributes should be inlined
// whenever possible.
if (Call.hasFnAttr(Attribute::AlwaysInline)) {
auto IsViable = isInlineViable(*Callee);
- if (IsViable)
- return llvm::InlineCost::getAlways("always inline attribute");
- return llvm::InlineCost::getNever(IsViable.message);
+ if (IsViable.isSuccess())
+ return InlineResult::success();
+ return InlineResult::failure(IsViable.getFailureReason());
}
// Never inline functions with conflicting attributes (unless callee has
// always-inline attribute).
Function *Caller = Call.getCaller();
- if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
- return llvm::InlineCost::getNever("conflicting attributes");
+ if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI, GetTLI))
+ return InlineResult::failure("conflicting attributes");
// Don't inline this call if the caller has the optnone attribute.
if (Caller->hasOptNone())
- return llvm::InlineCost::getNever("optnone attribute");
+ return InlineResult::failure("optnone attribute");
// Don't inline a function that treats null pointer as valid into a caller
// that does not have this attribute.
if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
- return llvm::InlineCost::getNever("nullptr definitions incompatible");
+ return InlineResult::failure("nullptr definitions incompatible");
// Don't inline functions which can be interposed at link-time.
if (Callee->isInterposable())
- return llvm::InlineCost::getNever("interposable");
+ return InlineResult::failure("interposable");
// Don't inline functions marked noinline.
if (Callee->hasFnAttribute(Attribute::NoInline))
- return llvm::InlineCost::getNever("noinline function attribute");
+ return InlineResult::failure("noinline function attribute");
// Don't inline call sites marked noinline.
if (Call.isNoInline())
- return llvm::InlineCost::getNever("noinline call site attribute");
+ return InlineResult::failure("noinline call site attribute");
+
+ return None;
+}
+
+InlineCost llvm::getInlineCost(
+ CallBase &Call, Function *Callee, const InlineParams &Params,
+ TargetTransformInfo &CalleeTTI,
+ function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
+ ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
+
+ auto UserDecision =
+ llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI);
+
+ if (UserDecision.hasValue()) {
+ if (UserDecision->isSuccess())
+ return llvm::InlineCost::getAlways("always inline attribute");
+ return llvm::InlineCost::getNever(UserDecision->getFailureReason());
+ }
LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
- << "... (caller:" << Caller->getName() << ")\n");
+ << "... (caller:" << Call.getCaller()->getName()
+ << ")\n");
- InlineCostCallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE,
- *Callee, Call, Params);
+ InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
+ GetAssumptionCache, GetBFI, PSI, ORE);
InlineResult ShouldInline = CA.analyze();
LLVM_DEBUG(CA.dump());
// Check if there was a reason to force inlining or no inlining.
- if (!ShouldInline && CA.getCost() < CA.getThreshold())
- return InlineCost::getNever(ShouldInline.message);
- if (ShouldInline && CA.getCost() >= CA.getThreshold())
+ if (!ShouldInline.isSuccess() && CA.getCost() < CA.getThreshold())
+ return InlineCost::getNever(ShouldInline.getFailureReason());
+ if (ShouldInline.isSuccess() && CA.getCost() >= CA.getThreshold())
return InlineCost::getAlways("empty function");
return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
@@ -2195,14 +2422,14 @@ InlineResult llvm::isInlineViable(Function &F) {
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
// Disallow inlining of functions which contain indirect branches.
if (isa<IndirectBrInst>(BI->getTerminator()))
- return "contains indirect branches";
+ return InlineResult::failure("contains indirect branches");
// Disallow inlining of blockaddresses which are used by non-callbr
// instructions.
if (BI->hasAddressTaken())
for (User *U : BlockAddress::get(&*BI)->users())
if (!isa<CallBrInst>(*U))
- return "blockaddress used outside of callbr";
+ return InlineResult::failure("blockaddress used outside of callbr");
for (auto &II : *BI) {
CallBase *Call = dyn_cast<CallBase>(&II);
@@ -2211,13 +2438,13 @@ InlineResult llvm::isInlineViable(Function &F) {
// Disallow recursive calls.
if (&F == Call->getCalledFunction())
- return "recursive call";
+ return InlineResult::failure("recursive call");
// Disallow calls which expose returns-twice to a function not previously
// attributed as such.
if (!ReturnsTwice && isa<CallInst>(Call) &&
cast<CallInst>(Call)->canReturnTwice())
- return "exposes returns-twice attribute";
+ return InlineResult::failure("exposes returns-twice attribute");
if (Call->getCalledFunction())
switch (Call->getCalledFunction()->getIntrinsicID()) {
@@ -2226,20 +2453,23 @@ InlineResult llvm::isInlineViable(Function &F) {
case llvm::Intrinsic::icall_branch_funnel:
// Disallow inlining of @llvm.icall.branch.funnel because current
// backend can't separate call targets from call arguments.
- return "disallowed inlining of @llvm.icall.branch.funnel";
+ return InlineResult::failure(
+ "disallowed inlining of @llvm.icall.branch.funnel");
case llvm::Intrinsic::localescape:
// Disallow inlining functions that call @llvm.localescape. Doing this
// correctly would require major changes to the inliner.
- return "disallowed inlining of @llvm.localescape";
+ return InlineResult::failure(
+ "disallowed inlining of @llvm.localescape");
case llvm::Intrinsic::vastart:
// Disallow inlining of functions that initialize VarArgs with
// va_start.
- return "contains VarArgs initialized with va_start";
+ return InlineResult::failure(
+ "contains VarArgs initialized with va_start");
}
}
}
- return true;
+ return InlineResult::success();
}
// APIs to create InlineParams based on command line flags and/or other
@@ -2299,7 +2529,7 @@ InlineParams llvm::getInlineParams(int Threshold) {
}
InlineParams llvm::getInlineParams() {
- return getInlineParams(InlineThreshold);
+ return getInlineParams(DefaultThreshold);
}
// Compute the default threshold for inlining based on the opt level and the
@@ -2312,7 +2542,7 @@ static int computeThresholdFromOptLevels(unsigned OptLevel,
return InlineConstants::OptSizeThreshold;
if (SizeOptLevel == 2) // -Oz
return InlineConstants::OptMinSizeThreshold;
- return InlineThreshold;
+ return DefaultThreshold;
}
InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
@@ -2325,3 +2555,40 @@ InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold;
return Params;
}
+
+PreservedAnalyses
+InlineCostAnnotationPrinterPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ PrintInstructionComments = true;
+ std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&](
+ Function &F) -> AssumptionCache & {
+ return FAM.getResult<AssumptionAnalysis>(F);
+ };
+ Module *M = F.getParent();
+ ProfileSummaryInfo PSI(*M);
+ DataLayout DL(M);
+ TargetTransformInfo TTI(DL);
+ // FIXME: Redesign the usage of InlineParams to expand the scope of this pass.
+ // In the current implementation, the type of InlineParams doesn't matter as
+ // the pass serves only for verification of inliner's decisions.
+ // We can add a flag which determines InlineParams for this run. Right now,
+ // the default InlineParams are used.
+ const InlineParams Params = llvm::getInlineParams();
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+ Function *CalledFunction = CI->getCalledFunction();
+ if (!CalledFunction || CalledFunction->isDeclaration())
+ continue;
+ OptimizationRemarkEmitter ORE(CalledFunction);
+ InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params, TTI,
+ GetAssumptionCache, nullptr, &PSI, &ORE);
+ ICCA.analyze();
+ OS << " Analyzing call of " << CalledFunction->getName()
+ << "... (caller:" << CI->getCaller()->getName() << ")\n";
+ ICCA.print();
+ }
+ }
+ }
+ return PreservedAnalyses::all();
+}