aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp')
-rw-r--r--contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp380
1 files changed, 253 insertions, 127 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 6baada2c1ae1..f0e781b9d923 100644
--- a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -23,39 +23,65 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/SampleProfile.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/PostDominators.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/SampleProf.h"
#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/Format.h"
+#include "llvm/Support/GenericDomTree.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include <cctype>
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <map>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <utility>
+#include <vector>
using namespace llvm;
using namespace sampleprof;
@@ -67,34 +93,39 @@ using namespace sampleprof;
static cl::opt<std::string> SampleProfileFile(
"sample-profile-file", cl::init(""), cl::value_desc("filename"),
cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
+
static cl::opt<unsigned> SampleProfileMaxPropagateIterations(
"sample-profile-max-propagate-iterations", cl::init(100),
cl::desc("Maximum number of iterations to go through when propagating "
"sample block/edge weights through the CFG."));
+
static cl::opt<unsigned> SampleProfileRecordCoverage(
"sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"),
cl::desc("Emit a warning if less than N% of records in the input profile "
"are matched to the IR."));
+
static cl::opt<unsigned> SampleProfileSampleCoverage(
"sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"),
cl::desc("Emit a warning if less than N% of samples in the input profile "
"are matched to the IR."));
+
static cl::opt<double> SampleProfileHotThreshold(
"sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"),
cl::desc("Inlined functions that account for more than N% of all samples "
"collected in the parent function, will be inlined again."));
namespace {
-typedef DenseMap<const BasicBlock *, uint64_t> BlockWeightMap;
-typedef DenseMap<const BasicBlock *, const BasicBlock *> EquivalenceClassMap;
-typedef std::pair<const BasicBlock *, const BasicBlock *> Edge;
-typedef DenseMap<Edge, uint64_t> EdgeWeightMap;
-typedef DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>
- BlockEdgeMap;
+
+using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
+using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
+using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
+using EdgeWeightMap = DenseMap<Edge, uint64_t>;
+using BlockEdgeMap =
+ DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>;
class SampleCoverageTracker {
public:
- SampleCoverageTracker() : SampleCoverage(), TotalUsedSamples(0) {}
+ SampleCoverageTracker() = default;
bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
uint32_t Discriminator, uint64_t Samples);
@@ -103,15 +134,16 @@ public:
unsigned countBodyRecords(const FunctionSamples *FS) const;
uint64_t getTotalUsedSamples() const { return TotalUsedSamples; }
uint64_t countBodySamples(const FunctionSamples *FS) const;
+
void clear() {
SampleCoverage.clear();
TotalUsedSamples = 0;
}
private:
- typedef std::map<LineLocation, unsigned> BodySampleCoverageMap;
- typedef DenseMap<const FunctionSamples *, BodySampleCoverageMap>
- FunctionSamplesCoverageMap;
+ using BodySampleCoverageMap = std::map<LineLocation, unsigned>;
+ using FunctionSamplesCoverageMap =
+ DenseMap<const FunctionSamples *, BodySampleCoverageMap>;
/// Coverage map for sampling records.
///
@@ -135,7 +167,7 @@ private:
/// and all the inlined callsites. Strictly, we should have a map of counters
/// keyed by FunctionSamples pointers, but these stats are cleared after
/// every function, so we just need to keep a single counter.
- uint64_t TotalUsedSamples;
+ uint64_t TotalUsedSamples = 0;
};
/// \brief Sample profile pass.
@@ -145,29 +177,31 @@ private:
/// profile information found in that file.
class SampleProfileLoader {
public:
- SampleProfileLoader(StringRef Name = SampleProfileFile)
- : DT(nullptr), PDT(nullptr), LI(nullptr), ACT(nullptr), Reader(),
- Samples(nullptr), Filename(Name), ProfileIsValid(false),
- TotalCollectedSamples(0) {}
+ SampleProfileLoader(
+ StringRef Name, bool IsThinLTOPreLink,
+ std::function<AssumptionCache &(Function &)> GetAssumptionCache,
+ std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
+ : GetAC(GetAssumptionCache), GetTTI(GetTargetTransformInfo),
+ Filename(Name), IsThinLTOPreLink(IsThinLTOPreLink) {}
bool doInitialization(Module &M);
- bool runOnModule(Module &M);
- void setACT(AssumptionCacheTracker *A) { ACT = A; }
+ bool runOnModule(Module &M, ModuleAnalysisManager *AM);
void dump() { Reader->dump(); }
protected:
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F, ModuleAnalysisManager *AM);
unsigned getFunctionLoc(Function &F);
bool emitAnnotations(Function &F);
ErrorOr<uint64_t> getInstWeight(const Instruction &I);
ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
std::vector<const FunctionSamples *>
- findIndirectCallFunctionSamples(const Instruction &I) const;
+ findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
+ bool inlineCallInstruction(Instruction *I);
bool inlineHotFunctions(Function &F,
- DenseSet<GlobalValue::GUID> &ImportGUIDs);
+ DenseSet<GlobalValue::GUID> &InlinedGUIDs);
void printEdgeWeight(raw_ostream &OS, Edge E);
void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
@@ -222,7 +256,8 @@ protected:
std::unique_ptr<PostDomTreeBase<BasicBlock>> PDT;
std::unique_ptr<LoopInfo> LI;
- AssumptionCacheTracker *ACT;
+ std::function<AssumptionCache &(Function &)> GetAC;
+ std::function<TargetTransformInfo &(Function &)> GetTTI;
/// \brief Predecessors for each basic block in the CFG.
BlockEdgeMap Predecessors;
@@ -236,19 +271,28 @@ protected:
std::unique_ptr<SampleProfileReader> Reader;
/// \brief Samples collected for the body of this function.
- FunctionSamples *Samples;
+ FunctionSamples *Samples = nullptr;
/// \brief Name of the profile file to load.
std::string Filename;
/// \brief Flag indicating whether the profile input loaded successfully.
- bool ProfileIsValid;
+ bool ProfileIsValid = false;
+
+ /// \brief Flag indicating if the pass is invoked in ThinLTO compile phase.
+ ///
+ /// In this phase, in annotation, we should not promote indirect calls.
+ /// Instead, we will mark GUIDs that needs to be annotated to the function.
+ bool IsThinLTOPreLink;
/// \brief Total number of samples collected in this profile.
///
/// This is the sum of all the samples collected in all the functions executed
/// at runtime.
- uint64_t TotalCollectedSamples;
+ uint64_t TotalCollectedSamples = 0;
+
+ /// \brief Optimization Remark Emitter used to emit diagnostic remarks.
+ OptimizationRemarkEmitter *ORE = nullptr;
};
class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -256,8 +300,15 @@ public:
// Class identification, replacement for typeinfo
static char ID;
- SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile)
- : ModulePass(ID), SampleLoader(Name) {
+ SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile,
+ bool IsThinLTOPreLink = false)
+ : ModulePass(ID), SampleLoader(Name, IsThinLTOPreLink,
+ [&](Function &F) -> AssumptionCache & {
+ return ACT->getAssumptionCache(F);
+ },
+ [&](Function &F) -> TargetTransformInfo & {
+ return TTIWP->getTTI(F);
+ }) {
initializeSampleProfileLoaderLegacyPassPass(
*PassRegistry::getPassRegistry());
}
@@ -267,17 +318,23 @@ public:
bool doInitialization(Module &M) override {
return SampleLoader.doInitialization(M);
}
+
StringRef getPassName() const override { return "Sample profile pass"; }
bool runOnModule(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
}
private:
SampleProfileLoader SampleLoader;
+ AssumptionCacheTracker *ACT = nullptr;
+ TargetTransformInfoWrapperPass *TTIWP = nullptr;
};
+} // end anonymous namespace
+
/// Return true if the given callsite is hot wrt to its caller.
///
/// Functions that were inlined in the original binary will be represented
@@ -292,8 +349,8 @@ private:
///
/// If that fraction is larger than the default given by
/// SampleProfileHotThreshold, the callsite will be inlined again.
-bool callsiteIsHot(const FunctionSamples *CallerFS,
- const FunctionSamples *CallsiteFS) {
+static bool callsiteIsHot(const FunctionSamples *CallerFS,
+ const FunctionSamples *CallsiteFS) {
if (!CallsiteFS)
return false; // The callsite was not inlined in the original binary.
@@ -309,7 +366,6 @@ bool callsiteIsHot(const FunctionSamples *CallerFS,
(double)CallsiteTotalSamples / (double)ParentTotalSamples * 100.0;
return PercentSamples >= SampleProfileHotThreshold;
}
-}
/// Mark as used the sample record for the given function samples at
/// (LineOffset, Discriminator).
@@ -423,6 +479,7 @@ unsigned SampleProfileLoader::getOffset(const DILocation *DIL) const {
0xffff;
}
+#ifndef NDEBUG
/// \brief Print the weight of edge \p E on stream \p OS.
///
/// \param OS Stream to emit the output to.
@@ -453,6 +510,7 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
OS << "weight[" << BB->getName() << "]: " << W << "\n";
}
+#endif
/// \brief Get the weight for an instruction.
///
@@ -480,10 +538,12 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst))
return std::error_code();
- // If a call/invoke instruction is inlined in profile, but not inlined here,
+ // If a direct call/invoke instruction is inlined in profile
+ // (findCalleeFunctionSamples returns non-empty result), but not inlined here,
// it means that the inlined callsite has no sample, thus the call
// instruction should have 0 count.
if ((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
+ !ImmutableCallSite(&Inst).isIndirectCall() &&
findCalleeFunctionSamples(Inst))
return 0;
@@ -495,13 +555,18 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
bool FirstMark =
CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
if (FirstMark) {
- const Function *F = Inst.getParent()->getParent();
- LLVMContext &Ctx = F->getContext();
- emitOptimizationRemark(
- Ctx, DEBUG_TYPE, *F, DLoc,
- Twine("Applied ") + Twine(*R) +
- " samples from profile (offset: " + Twine(LineOffset) +
- ((Discriminator) ? Twine(".") + Twine(Discriminator) : "") + ")");
+ ORE->emit([&]() {
+ OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
+ Remark << "Applied " << ore::NV("NumSamples", *R);
+ Remark << " samples from profile (offset: ";
+ Remark << ore::NV("LineOffset", LineOffset);
+ if (Discriminator) {
+ Remark << ".";
+ Remark << ore::NV("Discriminator", Discriminator);
+ }
+ Remark << ")";
+ return Remark;
+ });
}
DEBUG(dbgs() << " " << DLoc.getLine() << "."
<< DIL->getBaseDiscriminator() << ":" << Inst
@@ -588,10 +653,11 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
}
/// Returns a vector of FunctionSamples that are the indirect call targets
-/// of \p Inst. The vector is sorted by the total number of samples.
+/// of \p Inst. The vector is sorted by the total number of samples. Stores
+/// the total call count of the indirect call in \p Sum.
std::vector<const FunctionSamples *>
SampleProfileLoader::findIndirectCallFunctionSamples(
- const Instruction &Inst) const {
+ const Instruction &Inst, uint64_t &Sum) const {
const DILocation *DIL = Inst.getDebugLoc();
std::vector<const FunctionSamples *> R;
@@ -603,16 +669,25 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
if (FS == nullptr)
return R;
+ uint32_t LineOffset = getOffset(DIL);
+ uint32_t Discriminator = DIL->getBaseDiscriminator();
+
+ auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
+ Sum = 0;
+ if (T)
+ for (const auto &T_C : T.get())
+ Sum += T_C.second;
if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(
LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()))) {
- if (M->size() == 0)
+ if (M->empty())
return R;
for (const auto &NameFS : *M) {
+ Sum += NameFS.second.getEntrySamples();
R.push_back(&NameFS.second);
}
std::sort(R.begin(), R.end(),
[](const FunctionSamples *L, const FunctionSamples *R) {
- return L->getTotalSamples() > R->getTotalSamples();
+ return L->getEntrySamples() > R->getEntrySamples();
});
}
return R;
@@ -650,6 +725,39 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
return FS;
}
+bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
+ assert(isa<CallInst>(I) || isa<InvokeInst>(I));
+ CallSite CS(I);
+ Function *CalledFunction = CS.getCalledFunction();
+ assert(CalledFunction);
+ DebugLoc DLoc = I->getDebugLoc();
+ BasicBlock *BB = I->getParent();
+ InlineParams Params = getInlineParams();
+ Params.ComputeFullInlineCost = true;
+ // Checks if there is anything in the reachable portion of the callee at
+ // this callsite that makes this inlining potentially illegal. Need to
+ // set ComputeFullInlineCost, otherwise getInlineCost may return early
+ // when cost exceeds threshold without checking all IRs in the callee.
+ // The acutal cost does not matter because we only checks isNever() to
+ // see if it is legal to inline the callsite.
+ InlineCost Cost = getInlineCost(CS, Params, GetTTI(*CalledFunction), GetAC,
+ None, nullptr, nullptr);
+ if (Cost.isNever()) {
+ ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB)
+ << "incompatible inlining");
+ return false;
+ }
+ InlineFunctionInfo IFI(nullptr, &GetAC);
+ if (InlineFunction(CS, IFI)) {
+ // The call to InlineFunction erases I, so we can't pass it here.
+ ORE->emit(OptimizationRemark(DEBUG_TYPE, "HotInline", DLoc, BB)
+ << "inlined hot callee '" << ore::NV("Callee", CalledFunction)
+ << "' into '" << ore::NV("Caller", BB->getParent()) << "'");
+ return true;
+ }
+ return false;
+}
+
/// \brief Iteratively inline hot callsites of a function.
///
/// Iteratively traverse all callsites of the function \p F, and find if
@@ -659,17 +767,14 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
/// it to direct call. Each indirect call is limited with a single target.
///
/// \param F function to perform iterative inlining.
-/// \param ImportGUIDs a set to be updated to include all GUIDs that come
-/// from a different module but inlined in the profiled binary.
+/// \param InlinedGUIDs a set to be updated to include all GUIDs that are
+/// inlined in the profiled binary.
///
/// \returns True if there is any inline happened.
bool SampleProfileLoader::inlineHotFunctions(
- Function &F, DenseSet<GlobalValue::GUID> &ImportGUIDs) {
+ Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
DenseSet<Instruction *> PromotedInsns;
bool Changed = false;
- LLVMContext &Ctx = F.getContext();
- std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&](
- Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); };
while (true) {
bool LocalChanged = false;
SmallVector<Instruction *, 10> CIS;
@@ -690,57 +795,59 @@ bool SampleProfileLoader::inlineHotFunctions(
}
}
for (auto I : CIS) {
- InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr);
Function *CalledFunction = CallSite(I).getCalledFunction();
// Do not inline recursive calls.
if (CalledFunction == &F)
continue;
- Instruction *DI = I;
- if (!CalledFunction && !PromotedInsns.count(I) &&
- CallSite(I).isIndirectCall())
- for (const auto *FS : findIndirectCallFunctionSamples(*I)) {
+ if (CallSite(I).isIndirectCall()) {
+ if (PromotedInsns.count(I))
+ continue;
+ uint64_t Sum;
+ for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
+ if (IsThinLTOPreLink) {
+ FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
+ Samples->getTotalSamples() *
+ SampleProfileHotThreshold / 100);
+ continue;
+ }
auto CalleeFunctionName = FS->getName();
// If it is a recursive call, we do not inline it as it could bloat
// the code exponentially. There is way to better handle this, e.g.
// clone the caller first, and inline the cloned caller if it is
- // recursive. As llvm does not inline recursive calls, we will simply
- // ignore it instead of handling it explicitly.
+ // recursive. As llvm does not inline recursive calls, we will
+ // simply ignore it instead of handling it explicitly.
if (CalleeFunctionName == F.getName())
continue;
+
const char *Reason = "Callee function not available";
auto R = SymbolMap.find(CalleeFunctionName);
- if (R == SymbolMap.end())
- continue;
- CalledFunction = R->getValue();
- if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) {
- // The indirect target was promoted and inlined in the profile, as a
- // result, we do not have profile info for the branch probability.
- // We set the probability to 80% taken to indicate that the static
- // call is likely taken.
- DI = dyn_cast<Instruction>(
- promoteIndirectCall(I, CalledFunction, 80, 100, false)
- ->stripPointerCasts());
+ if (R != SymbolMap.end() && R->getValue() &&
+ !R->getValue()->isDeclaration() &&
+ R->getValue()->getSubprogram() &&
+ isLegalToPromote(CallSite(I), R->getValue(), &Reason)) {
+ uint64_t C = FS->getEntrySamples();
+ Instruction *DI =
+ pgo::promoteIndirectCall(I, R->getValue(), C, Sum, false, ORE);
+ Sum -= C;
PromotedInsns.insert(I);
+ // If profile mismatches, we should not attempt to inline DI.
+ if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
+ inlineCallInstruction(DI))
+ LocalChanged = true;
} else {
- DEBUG(dbgs() << "\nFailed to promote indirect call to "
- << CalleeFunctionName << " because " << Reason
- << "\n");
- continue;
+ DEBUG(dbgs()
+ << "\nFailed to promote indirect call to "
+ << CalleeFunctionName << " because " << Reason << "\n");
}
}
- if (!CalledFunction || !CalledFunction->getSubprogram()) {
- findCalleeFunctionSamples(*I)->findImportedFunctions(
- ImportGUIDs, F.getParent(),
+ } else if (CalledFunction && CalledFunction->getSubprogram() &&
+ !CalledFunction->isDeclaration()) {
+ if (inlineCallInstruction(I))
+ LocalChanged = true;
+ } else if (IsThinLTOPreLink) {
+ findCalleeFunctionSamples(*I)->findInlinedFunctions(
+ InlinedGUIDs, F.getParent(),
Samples->getTotalSamples() * SampleProfileHotThreshold / 100);
- continue;
- }
- DebugLoc DLoc = I->getDebugLoc();
- if (InlineFunction(CallSite(DI), IFI)) {
- LocalChanged = true;
- emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc,
- Twine("inlined hot callee '") +
- CalledFunction->getName() + "' into '" +
- F.getName() + "'");
}
}
if (LocalChanged) {
@@ -1076,24 +1183,20 @@ void SampleProfileLoader::buildEdges(Function &F) {
}
}
-/// Sorts the CallTargetMap \p M by count in descending order and stores the
-/// sorted result in \p Sorted. Returns the total counts.
-static uint64_t SortCallTargets(SmallVector<InstrProfValueData, 2> &Sorted,
- const SampleRecord::CallTargetMap &M) {
- Sorted.clear();
- uint64_t Sum = 0;
- for (auto I = M.begin(); I != M.end(); ++I) {
- Sum += I->getValue();
- Sorted.push_back({Function::getGUID(I->getKey()), I->getValue()});
- }
- std::sort(Sorted.begin(), Sorted.end(),
+/// Returns the sorted CallTargetMap \p M by count in descending order.
+static SmallVector<InstrProfValueData, 2> SortCallTargets(
+ const SampleRecord::CallTargetMap &M) {
+ SmallVector<InstrProfValueData, 2> R;
+ for (auto I = M.begin(); I != M.end(); ++I)
+ R.push_back({Function::getGUID(I->getKey()), I->getValue()});
+ std::sort(R.begin(), R.end(),
[](const InstrProfValueData &L, const InstrProfValueData &R) {
if (L.Count == R.Count)
return L.Value > R.Value;
else
return L.Count > R.Count;
});
- return Sum;
+ return R;
}
/// \brief Propagate weights into edges
@@ -1184,10 +1287,12 @@ void SampleProfileLoader::propagateWeights(Function &F) {
if (!FS)
continue;
auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
- if (!T || T.get().size() == 0)
+ if (!T || T.get().empty())
continue;
- SmallVector<InstrProfValueData, 2> SortedCallTargets;
- uint64_t Sum = SortCallTargets(SortedCallTargets, T.get());
+ SmallVector<InstrProfValueData, 2> SortedCallTargets =
+ SortCallTargets(T.get());
+ uint64_t Sum;
+ findIndirectCallFunctionSamples(I, Sum);
annotateValueSite(*I.getParent()->getParent()->getParent(), I,
SortedCallTargets, Sum, IPVK_IndirectCallTarget,
SortedCallTargets.size());
@@ -1211,7 +1316,7 @@ void SampleProfileLoader::propagateWeights(Function &F) {
<< ".\n");
SmallVector<uint32_t, 4> Weights;
uint32_t MaxWeight = 0;
- DebugLoc MaxDestLoc;
+ Instruction *MaxDestInst;
for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
BasicBlock *Succ = TI->getSuccessor(I);
Edge E = std::make_pair(BB, Succ);
@@ -1230,7 +1335,7 @@ void SampleProfileLoader::propagateWeights(Function &F) {
if (Weight != 0) {
if (Weight > MaxWeight) {
MaxWeight = Weight;
- MaxDestLoc = Succ->getFirstNonPHIOrDbgOrLifetime()->getDebugLoc();
+ MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
}
}
}
@@ -1243,15 +1348,13 @@ void SampleProfileLoader::propagateWeights(Function &F) {
// weights, the second pass does not need to set it.
if (MaxWeight > 0 && !TI->extractProfTotalWeight(TempWeight)) {
DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
- TI->setMetadata(llvm::LLVMContext::MD_prof,
+ TI->setMetadata(LLVMContext::MD_prof,
MDB.createBranchWeights(Weights));
- emitOptimizationRemark(
- Ctx, DEBUG_TYPE, F, MaxDestLoc,
- Twine("most popular destination for conditional branches at ") +
- ((BranchLoc) ? Twine(BranchLoc->getFilename() + ":" +
- Twine(BranchLoc.getLine()) + ":" +
- Twine(BranchLoc.getCol()))
- : Twine("<UNKNOWN LOCATION>")));
+ ORE->emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
+ << "most popular destination for conditional branches at "
+ << ore::NV("CondBranchesLoc", BranchLoc);
+ });
} else {
DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
}
@@ -1351,18 +1454,19 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
<< ": " << getFunctionLoc(F) << "\n");
- DenseSet<GlobalValue::GUID> ImportGUIDs;
- Changed |= inlineHotFunctions(F, ImportGUIDs);
+ DenseSet<GlobalValue::GUID> InlinedGUIDs;
+ Changed |= inlineHotFunctions(F, InlinedGUIDs);
// Compute basic block weights.
Changed |= computeBlockWeights(F);
if (Changed) {
// Add an entry count to the function using the samples gathered at the
- // function entry. Also sets the GUIDs that comes from a different
- // module but inlined in the profiled binary. This is aiming at making
- // the IR match the profiled binary before annotation.
- F.setEntryCount(Samples->getHeadSamples() + 1, &ImportGUIDs);
+ // function entry.
+ // Sets the GUIDs that are inlined in the profiled binary. This is used
+ // for ThinLink to make correct liveness analysis, and also make the IR
+ // match the profiled binary before annotation.
+ F.setEntryCount(Samples->getHeadSamples() + 1, &InlinedGUIDs);
// Compute dominance and loop info needed for propagation.
computeDominanceAndLoopInfo(F);
@@ -1404,9 +1508,11 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
}
char SampleProfileLoaderLegacyPass::ID = 0;
+
INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
"Sample Profile loader", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
"Sample Profile loader", false, false)
@@ -1431,7 +1537,7 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
return new SampleProfileLoaderLegacyPass(Name);
}
-bool SampleProfileLoader::runOnModule(Module &M) {
+bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM) {
if (!ProfileIsValid)
return false;
@@ -1463,7 +1569,7 @@ bool SampleProfileLoader::runOnModule(Module &M) {
for (auto &F : M)
if (!F.isDeclaration()) {
clearFunctionData();
- retval |= runOnFunction(F);
+ retval |= runOnFunction(F, AM);
}
if (M.getProfileSummary() == nullptr)
M.setProfileSummary(Reader->getSummary().getMD(M.getContext()));
@@ -1471,13 +1577,23 @@ bool SampleProfileLoader::runOnModule(Module &M) {
}
bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
- // FIXME: pass in AssumptionCache correctly for the new pass manager.
- SampleLoader.setACT(&getAnalysis<AssumptionCacheTracker>());
- return SampleLoader.runOnModule(M);
+ ACT = &getAnalysis<AssumptionCacheTracker>();
+ TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
+ return SampleLoader.runOnModule(M, nullptr);
}
-bool SampleProfileLoader::runOnFunction(Function &F) {
+bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
F.setEntryCount(0);
+ std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
+ if (AM) {
+ auto &FAM =
+ AM->getResult<FunctionAnalysisManagerModuleProxy>(*F.getParent())
+ .getManager();
+ ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ } else {
+ OwnedORE = make_unique<OptimizationRemarkEmitter>(&F);
+ ORE = OwnedORE.get();
+ }
Samples = Reader->getSamplesFor(F);
if (Samples && !Samples->empty())
return emitAnnotations(F);
@@ -1486,13 +1602,23 @@ bool SampleProfileLoader::runOnFunction(Function &F) {
PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
ModuleAnalysisManager &AM) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
+ return FAM.getResult<AssumptionAnalysis>(F);
+ };
+ auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
+ return FAM.getResult<TargetIRAnalysis>(F);
+ };
SampleProfileLoader SampleLoader(
- ProfileFileName.empty() ? SampleProfileFile : ProfileFileName);
+ ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
+ IsThinLTOPreLink, GetAssumptionCache, GetTTI);
SampleLoader.doInitialization(M);
- if (!SampleLoader.runOnModule(M))
+ if (!SampleLoader.runOnModule(M, &AM))
return PreservedAnalyses::all();
return PreservedAnalyses::none();