//===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the interface between the inliner and a learned model. // It delegates model evaluation to either the AOT compiled model (the // 'release' mode) or a runtime-loaded model (the 'development' case). // //===----------------------------------------------------------------------===// #include "llvm/Config/config.h" #if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) #include #include #include #include "llvm/ADT/SCCIterator.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/FunctionPropertiesAnalysis.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/MLInlineAdvisor.h" #include "llvm/Analysis/MLModelRunner.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Path.h" using namespace llvm; #ifdef LLVM_HAVE_TF_AOT #include "llvm/Analysis/ReleaseModeModelRunner.h" // codegen-ed file #include "InlinerSizeModel.h" // NOLINT #include "llvm/Analysis/InlineModelFeatureMaps.h" std::unique_ptr llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) { auto AOTRunner = std::make_unique>( M.getContext(), FeatureNameMap, DecisionName); return std::make_unique(M, MAM, std::move(AOTRunner)); } #endif #define DEBUG_TYPE "inline-ml" static cl::opt SizeIncreaseThreshold( "ml-advisor-size-increase-threshold", cl::Hidden, cl::desc("Maximum factor by which expected native size may increase before " "blocking any further inlining."), cl::init(2.0)); // clang-format off const std::array llvm::FeatureNameMap{ // InlineCost features - these must come first #define POPULATE_NAMES(INDEX_NAME, NAME) NAME, INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES) #undef POPULATE_NAMES // Non-cost features #define POPULATE_NAMES(INDEX_NAME, NAME, COMMENT) NAME, INLINE_FEATURE_ITERATOR(POPULATE_NAMES) #undef POPULATE_NAMES }; // clang-format on const char *const llvm::DecisionName = "inlining_decision"; const char *const llvm::DefaultDecisionName = "inlining_default"; const char *const llvm::RewardName = "delta_size"; CallBase *getInlinableCS(Instruction &I) { if (auto *CS = dyn_cast(&I)) if (Function *Callee = CS->getCalledFunction()) { if (!Callee->isDeclaration()) { return CS; } } return nullptr; } MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM, std::unique_ptr Runner) : InlineAdvisor( M, MAM.getResult(M).getManager()), ModelRunner(std::move(Runner)), CG(new CallGraph(M)), InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) { assert(ModelRunner); // Extract the 'call site height' feature - the position of a call site // relative to the farthest statically reachable SCC node. We don't mutate // this value while inlining happens. Empirically, this feature proved // critical in behavioral cloning - i.e. training a model to mimic the manual // heuristic's decisions - and, thus, equally important for training for // improvement. for (auto I = scc_begin(CG.get()); !I.isAtEnd(); ++I) { const std::vector &CGNodes = *I; unsigned Level = 0; for (auto *CGNode : CGNodes) { Function *F = CGNode->getFunction(); if (!F || F->isDeclaration()) continue; for (auto &I : instructions(F)) { if (auto *CS = getInlinableCS(I)) { auto *Called = CS->getCalledFunction(); auto Pos = FunctionLevels.find(Called); // In bottom up traversal, an inlinable callee is either in the // same SCC, or to a function in a visited SCC. So not finding its // level means we haven't visited it yet, meaning it's in this SCC. if (Pos == FunctionLevels.end()) continue; Level = std::max(Level, Pos->second + 1); } } } for (auto *CGNode : CGNodes) { Function *F = CGNode->getFunction(); if (F && !F->isDeclaration()) FunctionLevels[F] = Level; } } } void MLInlineAdvisor::onPassEntry() { // Function passes executed between InlinerPass runs may have changed the // module-wide features. if (!Invalid) return; NodeCount = 0; EdgeCount = 0; for (auto &F : M) if (!F.isDeclaration()) { ++NodeCount; EdgeCount += getLocalCalls(F); } Invalid = false; } int64_t MLInlineAdvisor::getLocalCalls(Function &F) { return FAM.getResult(F) .DirectCallsToDefinedFunctions; } // Update the internal state of the advisor, and force invalidate feature // analysis. Currently, we maintain minimal (and very simple) global state - the // number of functions and the number of static calls. We also keep track of the // total IR size in this module, to stop misbehaving policies at a certain bloat // factor (SizeIncreaseThreshold) void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice, bool CalleeWasDeleted) { assert(!ForceStop); Function *Caller = Advice.getCaller(); Function *Callee = Advice.getCallee(); // The caller features aren't valid anymore. { PreservedAnalyses PA = PreservedAnalyses::all(); PA.abandon(); FAM.invalidate(*Caller, PA); } int64_t IRSizeAfter = getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize); CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize); if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize) ForceStop = true; // We can delta-update module-wide features. We know the inlining only changed // the caller, and maybe the callee (by deleting the latter). // Nodes are simple to update. // For edges, we 'forget' the edges that the caller and callee used to have // before inlining, and add back what they currently have together. int64_t NewCallerAndCalleeEdges = FAM.getResult(*Caller) .DirectCallsToDefinedFunctions; if (CalleeWasDeleted) --NodeCount; else NewCallerAndCalleeEdges += FAM.getResult(*Callee) .DirectCallsToDefinedFunctions; EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges); assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0); } int64_t MLInlineAdvisor::getModuleIRSize() const { int64_t Ret = 0; for (auto &F : CG->getModule()) if (!F.isDeclaration()) Ret += getIRSize(F); return Ret; } std::unique_ptr MLInlineAdvisor::getAdviceImpl(CallBase &CB) { auto &Caller = *CB.getCaller(); auto &Callee = *CB.getCalledFunction(); auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return FAM.getResult(F); }; auto &TIR = FAM.getResult(Callee); auto &ORE = FAM.getResult(Caller); auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE); // If this is a "never inline" case, there won't be any changes to internal // state we need to track, so we can just return the base InlineAdvice, which // will do nothing interesting. // Same thing if this is a recursive case. if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never || &Caller == &Callee) return getMandatoryAdvice(CB, false); bool Mandatory = MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always; // If we need to stop, we won't want to track anymore any state changes, so // we just return the base InlineAdvice, which acts as a noop. if (ForceStop) { ORE.emit([&] { return OptimizationRemarkMissed(DEBUG_TYPE, "ForceStop", &CB) << "Won't attempt inlining because module size grew too much."; }); return std::make_unique(this, CB, ORE, Mandatory); } int CostEstimate = 0; if (!Mandatory) { auto IsCallSiteInlinable = llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache); if (!IsCallSiteInlinable) { // We can't inline this for correctness reasons, so return the base // InlineAdvice, as we don't care about tracking any state changes (which // won't happen). return std::make_unique(this, CB, ORE, false); } CostEstimate = *IsCallSiteInlinable; } const auto CostFeatures = llvm::getInliningCostFeatures(CB, TIR, GetAssumptionCache); if (!CostFeatures) { return std::make_unique(this, CB, ORE, false); } if (Mandatory) return getMandatoryAdvice(CB, true); auto NrCtantParams = 0; for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) { NrCtantParams += (isa(*I)); } auto &CallerBefore = FAM.getResult(Caller); auto &CalleeBefore = FAM.getResult(Callee); *ModelRunner->getTensor(FeatureIndex::CalleeBasicBlockCount) = CalleeBefore.BasicBlockCount; *ModelRunner->getTensor(FeatureIndex::CallSiteHeight) = FunctionLevels[&Caller]; *ModelRunner->getTensor(FeatureIndex::NodeCount) = NodeCount; *ModelRunner->getTensor(FeatureIndex::NrCtantParams) = NrCtantParams; *ModelRunner->getTensor(FeatureIndex::EdgeCount) = EdgeCount; *ModelRunner->getTensor(FeatureIndex::CallerUsers) = CallerBefore.Uses; *ModelRunner->getTensor( FeatureIndex::CallerConditionallyExecutedBlocks) = CallerBefore.BlocksReachedFromConditionalInstruction; *ModelRunner->getTensor(FeatureIndex::CallerBasicBlockCount) = CallerBefore.BasicBlockCount; *ModelRunner->getTensor( FeatureIndex::CalleeConditionallyExecutedBlocks) = CalleeBefore.BlocksReachedFromConditionalInstruction; *ModelRunner->getTensor(FeatureIndex::CalleeUsers) = CalleeBefore.Uses; *ModelRunner->getTensor(FeatureIndex::CostEstimate) = CostEstimate; // Add the cost features for (size_t I = 0; I < static_cast(InlineCostFeatureIndex::NumberOfFeatures); ++I) { *ModelRunner->getTensor(inlineCostFeatureToMlFeature( static_cast(I))) = CostFeatures->at(I); } return getAdviceFromModel(CB, ORE); } std::unique_ptr MLInlineAdvisor::getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) { return std::make_unique( this, CB, ORE, static_cast(ModelRunner->evaluate())); } std::unique_ptr MLInlineAdvisor::getMandatoryAdvice(CallBase &CB, bool Advice) { // Make sure we track inlinings in all cases - mandatory or not. if (Advice && !ForceStop) return getMandatoryAdviceImpl(CB); // If this is a "never inline" case, there won't be any changes to internal // state we need to track, so we can just return the base InlineAdvice, which // will do nothing interesting. // Same if we are forced to stop - we don't track anymore. return std::make_unique(this, CB, getCallerORE(CB), Advice); } std::unique_ptr MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { return std::make_unique(this, CB, getCallerORE(CB), true); } void MLInlineAdvice::reportContextForRemark( DiagnosticInfoOptimizationBase &OR) { using namespace ore; OR << NV("Callee", Callee->getName()); for (size_t I = 0; I < NumberOfFeatures; ++I) OR << NV(FeatureNameMap[I], *getAdvisor()->getModelRunner().getTensor(I)); OR << NV("ShouldInline", isInliningRecommended()); } void MLInlineAdvice::recordInliningImpl() { ORE.emit([&]() { OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block); reportContextForRemark(R); return R; }); getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false); } void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() { ORE.emit([&]() { OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc, Block); reportContextForRemark(R); return R; }); getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true); } void MLInlineAdvice::recordUnsuccessfulInliningImpl( const InlineResult &Result) { ORE.emit([&]() { OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful", DLoc, Block); reportContextForRemark(R); return R; }); } void MLInlineAdvice::recordUnattemptedInliningImpl() { ORE.emit([&]() { OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block); reportContextForRemark(R); return R; }); } #endif // defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API)